Skip to content

Commit

Permalink
fix(data-warehouse): schema status optional type (#23379)
Browse files Browse the repository at this point in the history
* should be optional

* adjust test to account for null status

* Update query snapshots

* Update query snapshots

* func name

* Update query snapshots

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Dylan Martin <[email protected]>
  • Loading branch information
3 people authored Jul 2, 2024
1 parent 46d50b1 commit 27506a8
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -227,26 +227,97 @@
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.1
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
/* user_id:0 request:_snapshot_ */
SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value,
count(*) as count
FROM events e
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam')
GROUP BY value
ORDER BY count DESC, value DESC
LIMIT 26
OFFSET 0
'''
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.2
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
/* user_id:0 request:_snapshot_ */
SELECT countIf(steps = 1) step_1,
countIf(steps = 2) step_2,
avg(step_1_average_conversion_time_inner) step_1_average_conversion_time,
median(step_1_median_conversion_time_inner) step_1_median_conversion_time,
prop
FROM
(SELECT aggregation_target,
steps,
avg(step_1_conversion_time) step_1_average_conversion_time_inner,
median(step_1_conversion_time) step_1_median_conversion_time_inner ,
prop
FROM
(SELECT aggregation_target,
steps,
max(steps) over (PARTITION BY aggregation_target,
prop) as max_steps,
step_1_conversion_time ,
prop
FROM
(SELECT *,
if(latest_0 <= latest_1
AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps ,
if(isNotNull(latest_1)
AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
prop
FROM
(SELECT aggregation_target, timestamp, step_0,
latest_0,
step_1,
min(latest_1) over (PARTITION by aggregation_target,
prop
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 ,
if(has([['test'], ['control']], prop), prop, ['Other']) as prop
FROM
(SELECT *,
if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop
FROM
(SELECT e.timestamp as timestamp,
pdi.person_id as aggregation_target,
pdi.person_id as person_id,
if(event = '$pageview', 1, 0) as step_0,
if(step_0 = 1, timestamp, null) as latest_0,
if(event = '$pageleave', 1, 0) as step_1,
if(step_1 = 1, timestamp, null) as latest_1,
array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic,
prop_basic as prop,
argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals
FROM events e
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam')
AND (step_0 = 1
OR step_1 = 1) )))
WHERE step_0 = 1 ))
GROUP BY aggregation_target,
steps,
prop
HAVING steps = max_steps)
GROUP BY prop
'''
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.3
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -2575,7 +2575,7 @@
"type": "string"
}
},
"required": ["id", "name", "should_sync", "incremental", "status"],
"required": ["id", "name", "should_sync", "incremental"],
"type": "object"
},
"DatabaseSchemaSource": {
Expand Down Expand Up @@ -5319,7 +5319,7 @@
"type": "object"
},
"NodeKind": {
"description": "PostHog Query Schema definition.\n\nThis file acts as the source of truth for:\n\n- frontend/src/queries/schema.json - generated from typescript via \"pnpm run generate:schema:json\"\n\n- posthog/schema.py - generated from json the above json via \"pnpm run generate:schema:python\"",
"description": "PostHog Query Schema definition.\n\nThis file acts as the source of truth for:\n\n- frontend/src/queries/schema.json - generated from typescript via \"pnpm run schema:build:json\"\n\n- posthog/schema.py - generated from json the above json via \"pnpm run schema:build:python\"",
"enum": [
"EventsNode",
"ActionsNode",
Expand Down
6 changes: 3 additions & 3 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ type integer = number
* This file acts as the source of truth for:
*
* - frontend/src/queries/schema.json
* - generated from typescript via "pnpm run generate:schema:json"
* - generated from typescript via "pnpm run schema:build:json"
*
* - posthog/schema.py
* - generated from json the above json via "pnpm run generate:schema:python"
* - generated from json the above json via "pnpm run schema:build:python"
* */

export enum NodeKind {
Expand Down Expand Up @@ -1430,7 +1430,7 @@ export interface DatabaseSchemaSchema {
name: string
should_sync: boolean
incremental: boolean
status: string
status?: string
last_synced_at?: string
}

Expand Down
4 changes: 2 additions & 2 deletions posthog/hogql/database/test/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def test_serialize_database_warehouse_table_source(self):
source=source,
table=warehouse_table,
should_sync=True,
status=ExternalDataSchema.Status.COMPLETED,
last_synced_at="2024-01-01",
# No status but should be completed because a data warehouse table already exists
)

database = create_hogql_database(team_id=self.team.pk)
Expand All @@ -183,7 +183,7 @@ def test_serialize_database_warehouse_table_source(self):
assert table.schema_.name == "table_1"
assert table.schema_.should_sync is True
assert table.schema_.incremental is False
assert table.schema_.status == "Completed"
assert table.schema_.status is None
assert table.schema_.last_synced_at == "2024-01-01 00:00:00+00:00"

field = table.fields.get("id")
Expand Down
2 changes: 1 addition & 1 deletion posthog/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ class DatabaseSchemaSchema(BaseModel):
last_synced_at: Optional[str] = None
name: str
should_sync: bool
status: str
status: Optional[str] = None


class DatabaseSchemaSource(BaseModel):
Expand Down

0 comments on commit 27506a8

Please sign in to comment.