-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
164 additions
and
67 deletions.
There are no files selected for viewing
26 changes: 26 additions & 0 deletions
26
...g/hogql_queries/insights/trends/test/__snapshots__/test_data_warehouse_query_builder.ambr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# name: TestDataWarehouseQueryBuilder.test_trends_data_warehouse | ||
' | ||
SELECT groupArray(day_start) AS date, | ||
groupArray(count) AS total | ||
FROM | ||
(SELECT sum(total) AS count, | ||
day_start AS day_start | ||
FROM | ||
(SELECT 0 AS total, | ||
minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start | ||
FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers | ||
UNION ALL SELECT 0 AS total, | ||
toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start | ||
UNION ALL SELECT count(e.id) AS total, | ||
toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start | ||
FROM s3Cluster('posthog', 'http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, created DateTime64(3, \'UTC\')') AS e | ||
WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) | ||
GROUP BY day_start) | ||
GROUP BY day_start | ||
ORDER BY day_start ASC) | ||
ORDER BY sum(count) DESC | ||
LIMIT 100 SETTINGS readonly=2, | ||
max_execution_time=60, | ||
allow_experimental_object_type=1 | ||
' | ||
--- |
137 changes: 137 additions & 0 deletions
137
posthog/hogql_queries/insights/trends/test/test_data_warehouse_query_builder.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
from datetime import datetime | ||
from freezegun import freeze_time | ||
from posthog.hogql.modifiers import create_default_modifiers_for_team | ||
|
||
from posthog.hogql.query import execute_hogql_query | ||
from posthog.hogql.timings import HogQLTimings | ||
from posthog.hogql_queries.insights.trends.data_warehouse_trends_query_builder import DataWarehouseTrendsQueryBuilder | ||
from posthog.hogql_queries.utils.query_date_range import QueryDateRange | ||
from posthog.schema import ( | ||
DateRange, | ||
DataWarehouseNode, | ||
TrendsQuery, | ||
) | ||
from posthog.test.base import BaseTest | ||
from posthog.warehouse.models import DataWarehouseTable, DataWarehouseCredential | ||
|
||
from boto3 import resource | ||
from botocore.config import Config | ||
from posthog.settings import ( | ||
OBJECT_STORAGE_ACCESS_KEY_ID, | ||
OBJECT_STORAGE_BUCKET, | ||
OBJECT_STORAGE_ENDPOINT, | ||
OBJECT_STORAGE_SECRET_ACCESS_KEY, | ||
) | ||
import s3fs | ||
from pyarrow import parquet as pq | ||
import pyarrow as pa | ||
|
||
from posthog.test.base import ( | ||
ClickhouseTestMixin, | ||
snapshot_clickhouse_queries, | ||
) | ||
|
||
TEST_BUCKET = "test_storage_bucket-posthog.hogql.datawarehouse.trendquery" | ||
|
||
|
||
class TestDataWarehouseQueryBuilder(ClickhouseTestMixin, BaseTest): | ||
def teardown_method(self, method) -> None: | ||
s3 = resource( | ||
"s3", | ||
endpoint_url=OBJECT_STORAGE_ENDPOINT, | ||
aws_access_key_id=OBJECT_STORAGE_ACCESS_KEY_ID, | ||
aws_secret_access_key=OBJECT_STORAGE_SECRET_ACCESS_KEY, | ||
config=Config(signature_version="s3v4"), | ||
region_name="us-east-1", | ||
) | ||
bucket = s3.Bucket(OBJECT_STORAGE_BUCKET) | ||
bucket.objects.filter(Prefix=TEST_BUCKET).delete() | ||
|
||
def get_response(self, trends_query: TrendsQuery): | ||
query_date_range = QueryDateRange( | ||
date_range=trends_query.dateRange, | ||
team=self.team, | ||
interval=trends_query.interval, | ||
now=datetime.now(), | ||
) | ||
|
||
timings = HogQLTimings() | ||
modifiers = create_default_modifiers_for_team(self.team) | ||
|
||
query_builder = DataWarehouseTrendsQueryBuilder( | ||
trends_query=trends_query, | ||
team=self.team, | ||
query_date_range=query_date_range, | ||
series=trends_query.series[0], | ||
Check failure on line 65 in posthog/hogql_queries/insights/trends/test/test_data_warehouse_query_builder.py GitHub Actions / Python code quality checks
|
||
timings=timings, | ||
modifiers=modifiers, | ||
) | ||
|
||
query = query_builder.build_query() | ||
|
||
return execute_hogql_query( | ||
query_type="TrendsQuery", | ||
query=query, | ||
team=self.team, | ||
timings=timings, | ||
) | ||
|
||
def create_parquet_file(self): | ||
fs = s3fs.S3FileSystem( | ||
anon=False, | ||
use_ssl=False, | ||
client_kwargs={ | ||
"region_name": "us-east-1", | ||
"endpoint_url": OBJECT_STORAGE_ENDPOINT, | ||
"aws_access_key_id": OBJECT_STORAGE_ACCESS_KEY_ID, | ||
"aws_secret_access_key": OBJECT_STORAGE_SECRET_ACCESS_KEY, | ||
"verify": False, | ||
}, | ||
) | ||
|
||
path_to_s3_object = "s3://" + OBJECT_STORAGE_BUCKET + f"/{TEST_BUCKET}" | ||
|
||
id = pa.array(["1", "2", "3", "4"]) | ||
created = pa.array([datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 3), datetime(2023, 1, 4)]) | ||
names = ["id", "created"] | ||
|
||
pq.write_to_dataset( | ||
pa.Table.from_arrays([id, created], names=names), | ||
path_to_s3_object, | ||
filesystem=fs, | ||
use_dictionary=True, | ||
compression="snappy", | ||
version="2.0", | ||
) | ||
|
||
@snapshot_clickhouse_queries | ||
def test_trends_data_warehouse(self): | ||
self.create_parquet_file() | ||
credential = DataWarehouseCredential.objects.create( | ||
access_key=OBJECT_STORAGE_ACCESS_KEY_ID, access_secret=OBJECT_STORAGE_SECRET_ACCESS_KEY, team=self.team | ||
) | ||
|
||
DataWarehouseTable.objects.create( | ||
name="stripe_charges", | ||
url_pattern=f"http://host.docker.internal:19000/{OBJECT_STORAGE_BUCKET}/{TEST_BUCKET}/*.parquet", | ||
format=DataWarehouseTable.TableFormat.Parquet, | ||
team=self.team, | ||
columns={ | ||
"id": "String", | ||
"created": "DateTime64(3, 'UTC')", | ||
}, | ||
credential=credential, | ||
) | ||
|
||
trends_query = TrendsQuery( | ||
kind="TrendsQuery", | ||
dateRange=DateRange(date_from="2023-01-01"), | ||
series=[DataWarehouseNode(table_name="stripe_charges", id_field="id", timestamp_field="created")], | ||
) | ||
|
||
with freeze_time("2023-01-07"): | ||
response = self.get_response(trends_query=trends_query) | ||
|
||
assert response.columns is not None | ||
assert set(response.columns).issubset({"date", "total"}) | ||
assert response.results[0][1] == [1, 1, 1, 1, 0, 0, 0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters