Skip to content

Commit

Permalink
Merge branch 'aspicer/stickiness' of github.com:PostHog/posthog into …
Browse files Browse the repository at this point in the history
…aspicer/stickiness
  • Loading branch information
aspicer committed Nov 26, 2024
2 parents e994f37 + f1741ef commit bef27a2
Show file tree
Hide file tree
Showing 5 changed files with 319 additions and 0 deletions.
319 changes: 319 additions & 0 deletions ee/clickhouse/views/test/__snapshots__/test_clickhouse_stickiness.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -362,3 +362,322 @@
OFFSET 0
'''
# ---
# name: TestClickhouseStickiness.test_stickiness_all_time
'''
/* user_id:0 request:_snapshot_ */
SELECT timestamp
from events
WHERE team_id = 99999
AND timestamp > '2015-01-01'
order by timestamp
limit 1
'''
# ---
# name: TestClickhouseStickiness.test_stickiness_all_time.1
'''
/* user_id:0 request:_snapshot_ */
SELECT countDistinct(aggregation_target),
num_intervals
FROM
(SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target,
countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals
FROM events e
LEFT OUTER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 99999
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 99999
AND event = 'watched movie'
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 99999
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
AND event = 'watched movie'
GROUP BY aggregation_target)
WHERE num_intervals <= 9
GROUP BY num_intervals
ORDER BY num_intervals
'''
# ---
# name: TestClickhouseStickiness.test_stickiness_all_time_with_sampling
'''
/* user_id:0 request:_snapshot_ */
SELECT timestamp
from events
WHERE team_id = 99999
AND timestamp > '2015-01-01'
order by timestamp
limit 1
'''
# ---
# name: TestClickhouseStickiness.test_stickiness_all_time_with_sampling.1
'''
/* user_id:0 request:_snapshot_ */
SELECT countDistinct(aggregation_target),
num_intervals
FROM
(SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target,
countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals
FROM events e SAMPLE 1.0
LEFT OUTER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 99999
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 99999
AND event = 'watched movie'
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 99999
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
AND event = 'watched movie'
GROUP BY aggregation_target)
WHERE num_intervals <= 9
GROUP BY num_intervals
ORDER BY num_intervals
'''
# ---
# name: TestClickhouseStickiness.test_stickiness_hours
'''
/* user_id:0 request:_snapshot_ */
SELECT countDistinct(aggregation_target),
num_intervals
FROM
(SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target,
countDistinct(toStartOfHour(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals
FROM events e
LEFT OUTER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 99999
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 99999
AND event = 'watched movie'
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfHour(toDateTime('2020-01-01 12:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-01 20:00:00', 'UTC') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 99999
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfHour(toDateTime('2020-01-01 12:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-01 20:00:00', 'UTC')
AND event = 'watched movie'
GROUP BY aggregation_target)
WHERE num_intervals <= 10
GROUP BY num_intervals
ORDER BY num_intervals
'''
# ---
# name: TestClickhouseStickiness.test_stickiness_people_endpoint
'''
/* user_id:0 request:_snapshot_ */
SELECT DISTINCT aggregation_target AS actor_id
FROM
(SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target,
countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals
FROM events e
LEFT OUTER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 99999
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 99999
AND ((event = 'watched movie'))
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 99999
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
AND ((event = 'watched movie'))
GROUP BY aggregation_target)
WHERE num_intervals = 1
LIMIT 100
OFFSET 0
'''
# ---
# name: TestClickhouseStickiness.test_stickiness_people_paginated
'''
/* user_id:0 request:_snapshot_ */
SELECT DISTINCT aggregation_target AS actor_id
FROM
(SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target,
countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals
FROM events e
LEFT OUTER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 99999
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 99999
AND ((event = 'watched movie'))
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 99999
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
AND ((event = 'watched movie'))
GROUP BY aggregation_target)
WHERE num_intervals = 1
LIMIT 100
OFFSET 0
'''
# ---
# name: TestClickhouseStickiness.test_stickiness_people_paginated.1
'''
/* user_id:0 request:_snapshot_ */
SELECT DISTINCT aggregation_target AS actor_id
FROM
(SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target,
countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals
FROM events e
LEFT OUTER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 99999
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 99999
AND ((event = 'watched movie'))
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 99999
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
AND ((event = 'watched movie'))
GROUP BY aggregation_target)
WHERE num_intervals = 1
LIMIT 100
OFFSET 100
'''
# ---
# name: TestClickhouseStickiness.test_stickiness_with_person_on_events_v2
'''

SELECT DISTINCT person_id
FROM events
WHERE team_id = 99999
AND distinct_id = 'person2'
'''
# ---
# name: TestClickhouseStickiness.test_stickiness_with_person_on_events_v2.1
'''
/* user_id:0 request:_snapshot_ */
SELECT countDistinct(aggregation_target),
num_intervals
FROM
(SELECT if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) AS aggregation_target,
countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals
FROM events e
LEFT OUTER JOIN
(SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
person_distinct_id_overrides.distinct_id AS distinct_id
FROM person_distinct_id_overrides
WHERE equals(person_distinct_id_overrides.team_id, 99999)
GROUP BY person_distinct_id_overrides.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id
WHERE team_id = 99999
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
AND event = 'watched movie'
AND notEmpty(e.person_id)
GROUP BY aggregation_target)
WHERE num_intervals <= 9
GROUP BY num_intervals
ORDER BY num_intervals
'''
# ---
# name: TestClickhouseStickiness.test_timezones
'''

SELECT countDistinct(aggregation_target),
num_intervals
FROM
(SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target,
countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals
FROM events e
LEFT OUTER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 99999
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 99999
AND event = '$pageview'
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-15 23:59:59', 'UTC') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 99999
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-15 23:59:59', 'UTC')
AND event = '$pageview'
GROUP BY aggregation_target)
WHERE num_intervals <= 16
GROUP BY num_intervals
ORDER BY num_intervals
'''
# ---
# name: TestClickhouseStickiness.test_timezones.1
'''

SELECT countDistinct(aggregation_target),
num_intervals
FROM
(SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target,
countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'US/Pacific'))) as num_intervals
FROM events e
LEFT OUTER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 99999
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 99999
AND event = '$pageview'
AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'US/Pacific')), 'US/Pacific')
AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2021-05-15 23:59:59', 'US/Pacific') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 99999
AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'US/Pacific')), 'US/Pacific')
AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2021-05-15 23:59:59', 'US/Pacific')
AND event = '$pageview'
GROUP BY aggregation_target)
WHERE num_intervals <= 16
GROUP BY num_intervals
ORDER BY num_intervals
'''
# ---
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit bef27a2

Please sign in to comment.