Skip to content

Commit

Permalink
add test
Browse files Browse the repository at this point in the history
  • Loading branch information
EDsCODE committed Feb 13, 2024
1 parent 1b3dc17 commit 74b8056
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 67 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# name: TestDataWarehouseQueryBuilder.test_trends_data_warehouse
'
SELECT groupArray(day_start) AS date,
groupArray(count) AS total
FROM
(SELECT sum(total) AS count,
day_start AS day_start
FROM
(SELECT 0 AS total,
minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start
FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers
UNION ALL SELECT 0 AS total,
toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start
UNION ALL SELECT count(e.id) AS total,
toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start
FROM s3Cluster('posthog', 'http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, created DateTime64(3, \'UTC\')') AS e
WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0))
GROUP BY day_start)
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
LIMIT 100 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1
'
---
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from datetime import datetime
from freezegun import freeze_time
from posthog.hogql.modifiers import create_default_modifiers_for_team

from posthog.hogql.query import execute_hogql_query
from posthog.hogql.timings import HogQLTimings
from posthog.hogql_queries.insights.trends.data_warehouse_trends_query_builder import DataWarehouseTrendsQueryBuilder
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.schema import (
DateRange,
DataWarehouseNode,
TrendsQuery,
)
from posthog.test.base import BaseTest
from posthog.warehouse.models import DataWarehouseTable, DataWarehouseCredential

from boto3 import resource
from botocore.config import Config
from posthog.settings import (
OBJECT_STORAGE_ACCESS_KEY_ID,
OBJECT_STORAGE_BUCKET,
OBJECT_STORAGE_ENDPOINT,
OBJECT_STORAGE_SECRET_ACCESS_KEY,
)
import s3fs
from pyarrow import parquet as pq
import pyarrow as pa

from posthog.test.base import (
ClickhouseTestMixin,
snapshot_clickhouse_queries,
)

TEST_BUCKET = "test_storage_bucket-posthog.hogql.datawarehouse.trendquery"


class TestDataWarehouseQueryBuilder(ClickhouseTestMixin, BaseTest):
def teardown_method(self, method) -> None:
s3 = resource(
"s3",
endpoint_url=OBJECT_STORAGE_ENDPOINT,
aws_access_key_id=OBJECT_STORAGE_ACCESS_KEY_ID,
aws_secret_access_key=OBJECT_STORAGE_SECRET_ACCESS_KEY,
config=Config(signature_version="s3v4"),
region_name="us-east-1",
)
bucket = s3.Bucket(OBJECT_STORAGE_BUCKET)
bucket.objects.filter(Prefix=TEST_BUCKET).delete()

def get_response(self, trends_query: TrendsQuery):
query_date_range = QueryDateRange(
date_range=trends_query.dateRange,
team=self.team,
interval=trends_query.interval,
now=datetime.now(),
)

timings = HogQLTimings()
modifiers = create_default_modifiers_for_team(self.team)

query_builder = DataWarehouseTrendsQueryBuilder(
trends_query=trends_query,
team=self.team,
query_date_range=query_date_range,
series=trends_query.series[0],

Check failure on line 65 in posthog/hogql_queries/insights/trends/test/test_data_warehouse_query_builder.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Argument "series" to "DataWarehouseTrendsQueryBuilder" has incompatible type "EventsNode | ActionsNode | DataWarehouseNode"; expected "DataWarehouseNode"
timings=timings,
modifiers=modifiers,
)

query = query_builder.build_query()

return execute_hogql_query(
query_type="TrendsQuery",
query=query,
team=self.team,
timings=timings,
)

def create_parquet_file(self):
fs = s3fs.S3FileSystem(
anon=False,
use_ssl=False,
client_kwargs={
"region_name": "us-east-1",
"endpoint_url": OBJECT_STORAGE_ENDPOINT,
"aws_access_key_id": OBJECT_STORAGE_ACCESS_KEY_ID,
"aws_secret_access_key": OBJECT_STORAGE_SECRET_ACCESS_KEY,
"verify": False,
},
)

path_to_s3_object = "s3://" + OBJECT_STORAGE_BUCKET + f"/{TEST_BUCKET}"

id = pa.array(["1", "2", "3", "4"])
created = pa.array([datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 3), datetime(2023, 1, 4)])
names = ["id", "created"]

pq.write_to_dataset(
pa.Table.from_arrays([id, created], names=names),
path_to_s3_object,
filesystem=fs,
use_dictionary=True,
compression="snappy",
version="2.0",
)

@snapshot_clickhouse_queries
def test_trends_data_warehouse(self):
self.create_parquet_file()
credential = DataWarehouseCredential.objects.create(
access_key=OBJECT_STORAGE_ACCESS_KEY_ID, access_secret=OBJECT_STORAGE_SECRET_ACCESS_KEY, team=self.team
)

DataWarehouseTable.objects.create(
name="stripe_charges",
url_pattern=f"http://host.docker.internal:19000/{OBJECT_STORAGE_BUCKET}/{TEST_BUCKET}/*.parquet",
format=DataWarehouseTable.TableFormat.Parquet,
team=self.team,
columns={
"id": "String",
"created": "DateTime64(3, 'UTC')",
},
credential=credential,
)

trends_query = TrendsQuery(
kind="TrendsQuery",
dateRange=DateRange(date_from="2023-01-01"),
series=[DataWarehouseNode(table_name="stripe_charges", id_field="id", timestamp_field="created")],
)

with freeze_time("2023-01-07"):
response = self.get_response(trends_query=trends_query)

assert response.columns is not None
assert set(response.columns).issubset({"date", "total"})
assert response.results[0][1] == [1, 1, 1, 1, 0, 0, 0]
68 changes: 1 addition & 67 deletions posthog/hogql_queries/insights/trends/test/test_trends.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
Person,
)

from posthog.warehouse.models import DataWarehouseTable, DataWarehouseCredential

from posthog.models.group.util import create_group
from posthog.models.instance_setting import (
get_instance_setting,
Expand All @@ -48,7 +46,6 @@
ActionsNode,
BreakdownFilter,
DateRange,
DataWarehouseNode,
EventsNode,
PropertyGroupFilter,
SeriesType,
Expand Down Expand Up @@ -129,7 +126,6 @@ def convert_filter_to_trends_query(filter: Filter) -> TrendsQuery:

events: List[EventsNode] = []
actions: List[ActionsNode] = []
data_warehouse_entities: List[DataWarehouseNode] = []

for event in filter.events:
if isinstance(event._data.get("properties", None), List):
Expand Down Expand Up @@ -175,30 +171,7 @@ def convert_filter_to_trends_query(filter: Filter) -> TrendsQuery:
)
)

for entity in filter.data_warehouse_entities:
if isinstance(entity._data.get("properties", None), List):
properties = clean_entity_properties(entity._data.get("properties", None))
elif entity._data.get("properties", None) is not None:
values = entity._data.get("properties", None).get("values", None)
properties = clean_entity_properties(values)
else:
properties = None

data_warehouse_entities.append(
DataWarehouseNode(
table_name=entity.id,
id_field=entity.id_field,
timestamp_field=entity.timestamp_field,
custom_name=entity.custom_name,
math=entity.math,
math_property=entity.math_property,
math_hogql=entity.math_hogql,
math_group_type_index=entity.math_group_type_index,
properties=properties,
)
)

series: List[SeriesType] = [*events, *actions, *data_warehouse_entities]
series: List[SeriesType] = [*events, *actions]

tq = TrendsQuery(
series=series,
Expand Down Expand Up @@ -513,45 +486,6 @@ def test_trends_per_day(self):
self.assertEqual(response[0]["labels"][5], "2-Jan-2020")
self.assertEqual(response[0]["data"][5], 1.0)

@snapshot_clickhouse_queries
def test_trends_data_warehouse(self):
self._create_events()

credential = DataWarehouseCredential.objects.create(access_key="test", access_secret="test", team=self.team)
DataWarehouseTable.objects.create(
name="stripe_charges",
url_pattern="https://databeach-hackathon.s3.amazonaws.com/tim_test/test_events6.pqt",
format=DataWarehouseTable.TableFormat.Parquet,
team=self.team,
columns={
"id": "String",
"created": "DateTime64(3, 'UTC')",
"mrr": "Nullable(Int64)",
"offset": "UInt32",
},
credential=credential,
)

with freeze_time("2020-01-04T13:00:01Z"):
# with self.assertNumQueries(16):
self._run(
Filter(
team=self.team,
data={
"date_from": "-7d",
"data_warehouse_entities": [
{
"id": "stripe_charges",
"id_field": "id",
"timestamp_field": "created",
"type": "data_warehouse",
}
],
},
),
self.team,
)

# just make sure this doesn't error
def test_no_props(self):
PropertyDefinition.objects.create(
Expand Down

0 comments on commit 74b8056

Please sign in to comment.