From 92de99a8d2a6714c4de421c649cb811a985a6669 Mon Sep 17 00:00:00 2001 From: Eric Duong Date: Mon, 1 Jul 2024 20:09:09 -0400 Subject: [PATCH] fix(data-warehouse): schema status optional type (#23379) * should be optional * adjust test to account for null status * Update query snapshots * Update query snapshots * func name * Update query snapshots --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Dylan Martin --- .../test_clickhouse_experiments.ambr | 103 +++++++++++++++--- frontend/src/queries/schema.json | 4 +- frontend/src/queries/schema.ts | 6 +- posthog/hogql/database/test/test_database.py | 4 +- posthog/schema.py | 2 +- 5 files changed, 95 insertions(+), 24 deletions(-) diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr index 11e1e0317f71d4..389171ebc7e126 100644 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr +++ b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr @@ -227,26 +227,97 @@ # --- # name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.1 ''' - /* celery:posthog.tasks.tasks.sync_insight_caching_state */ - SELECT team_id, - date_diff('second', max(timestamp), now()) AS age - FROM events - WHERE timestamp > date_sub(DAY, 3, now()) - AND timestamp < now() - GROUP BY team_id - ORDER BY age; + /* user_id:0 request:_snapshot_ */ + SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value, + count(*) as count + FROM events e + WHERE team_id = 2 + AND event IN ['$pageleave', '$pageview'] + AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam') + AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 ''' # --- # name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.2 ''' - /* celery:posthog.tasks.tasks.sync_insight_caching_state */ - SELECT team_id, - date_diff('second', max(timestamp), now()) AS age - FROM events - WHERE timestamp > date_sub(DAY, 3, now()) - AND timestamp < now() - GROUP BY team_id - ORDER BY age; + /* user_id:0 request:_snapshot_ */ + SELECT countIf(steps = 1) step_1, + countIf(steps = 2) step_2, + avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) step_1_median_conversion_time, + prop + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner , + prop + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, + step_1_conversion_time , + prop + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , + if(has([['test'], ['control']], prop), prop, ['Other']) as prop + FROM + (SELECT *, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = '$pageview', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = '$pageleave', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['$pageleave', '$pageview'] + AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam') + AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 2 + AND event IN ['$pageleave', '$pageview'] + AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam') + AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') + AND (step_0 = 1 + OR step_1 = 1) ))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps, + prop + HAVING steps = max_steps) + GROUP BY prop ''' # --- # name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.3 diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 7811c9e1e43bb6..f00512ba8906e5 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -2575,7 +2575,7 @@ "type": "string" } }, - "required": ["id", "name", "should_sync", "incremental", "status"], + "required": ["id", "name", "should_sync", "incremental"], "type": "object" }, "DatabaseSchemaSource": { @@ -5319,7 +5319,7 @@ "type": "object" }, "NodeKind": { - "description": "PostHog Query Schema definition.\n\nThis file acts as the source of truth for:\n\n- frontend/src/queries/schema.json - generated from typescript via \"pnpm run generate:schema:json\"\n\n- posthog/schema.py - generated from json the above json via \"pnpm run generate:schema:python\"", + "description": "PostHog Query Schema definition.\n\nThis file acts as the source of truth for:\n\n- frontend/src/queries/schema.json - generated from typescript via \"pnpm run schema:build:json\"\n\n- posthog/schema.py - generated from json the above json via \"pnpm run schema:build:python\"", "enum": [ "EventsNode", "ActionsNode", diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index cfbddedd9ce928..e34100a494b756 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -41,10 +41,10 @@ type integer = number * This file acts as the source of truth for: * * - frontend/src/queries/schema.json - * - generated from typescript via "pnpm run generate:schema:json" + * - generated from typescript via "pnpm run schema:build:json" * * - posthog/schema.py - * - generated from json the above json via "pnpm run generate:schema:python" + * - generated from json the above json via "pnpm run schema:build:python" * */ export enum NodeKind { @@ -1430,7 +1430,7 @@ export interface DatabaseSchemaSchema { name: string should_sync: boolean incremental: boolean - status: string + status?: string last_synced_at?: string } diff --git a/posthog/hogql/database/test/test_database.py b/posthog/hogql/database/test/test_database.py index f91187d1e091e5..215677abe41bac 100644 --- a/posthog/hogql/database/test/test_database.py +++ b/posthog/hogql/database/test/test_database.py @@ -161,8 +161,8 @@ def test_serialize_database_warehouse_table_source(self): source=source, table=warehouse_table, should_sync=True, - status=ExternalDataSchema.Status.COMPLETED, last_synced_at="2024-01-01", + # No status but should be completed because a data warehouse table already exists ) database = create_hogql_database(team_id=self.team.pk) @@ -183,7 +183,7 @@ def test_serialize_database_warehouse_table_source(self): assert table.schema_.name == "table_1" assert table.schema_.should_sync is True assert table.schema_.incremental is False - assert table.schema_.status == "Completed" + assert table.schema_.status is None assert table.schema_.last_synced_at == "2024-01-01 00:00:00+00:00" field = table.fields.get("id") diff --git a/posthog/schema.py b/posthog/schema.py index 38e3643d3a3b67..c46df6df526993 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -241,7 +241,7 @@ class DatabaseSchemaSchema(BaseModel): last_synced_at: Optional[str] = None name: str should_sync: bool - status: str + status: Optional[str] = None class DatabaseSchemaSource(BaseModel):