Skip to content

Commit

Permalink
fix: rolling retention (PostHog#24926)
Browse files Browse the repository at this point in the history
  • Loading branch information
aspicer authored and abhi12299 committed Sep 13, 2024
1 parent b0e018c commit 0a71651
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 66 deletions.
42 changes: 15 additions & 27 deletions frontend/src/scenes/retention/RetentionModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export function RetentionModal(): JSX.Element | null {
const { results } = useValues(retentionLogic(insightProps))
const { people, peopleLoading, peopleLoadingMore } = useValues(retentionPeopleLogic(insightProps))
const { loadMorePeople } = useActions(retentionPeopleLogic(insightProps))
const { aggregationTargetLabel, selectedInterval, exploreUrl, actorsQuery, retentionFilter } = useValues(
const { aggregationTargetLabel, selectedInterval, exploreUrl, actorsQuery } = useValues(
retentionModalLogic(insightProps)
)
const { closeModal } = useActions(retentionModalLogic(insightProps))
Expand Down Expand Up @@ -111,32 +111,20 @@ export function RetentionModal(): JSX.Element | null {
<tbody>
<tr>
<th>{capitalizeFirstLetter(aggregationTargetLabel.singular)}</th>
{row.values?.map((data: any, index: number) => {
let cumulativeCount = data.count
if (retentionFilter?.cumulative) {
for (let i = index + 1; i < row.values.length; i++) {
cumulativeCount += row.values[i].count
}
cumulativeCount = Math.min(cumulativeCount, row.values[0].count)
}
const percentageValue =
row.values[0].count > 0 ? cumulativeCount / row.values[0].count : 0

return (
<th key={index}>
<div>{results[index].label}</div>
<div>
{cumulativeCount}
&nbsp;
{cumulativeCount > 0 && (
<span className="text-muted">
({percentage(percentageValue)})
</span>
)}
</div>
</th>
)
})}
{row.values?.map((data: any, index: number) => (
<th key={index}>
<div>{results[index].label}</div>
<div>
{data.count}
&nbsp;
{data.count > 0 && (
<span className="text-muted">
({percentage(data.count / row?.values[0]['count'])})
</span>
)}
</div>
</th>
))}
</tr>
{people.result &&
people.result.map((personAppearances: RetentionTableAppearanceType) => (
Expand Down
14 changes: 2 additions & 12 deletions frontend/src/scenes/retention/retentionLineGraphLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export const retentionLineGraphLogic = kea<retentionLineGraphLogicType>([
trendSeries: [
(s) => [s.results, s.retentionFilter],
(results, retentionFilter): RetentionTrendPayload[] => {
const { period, retentionReference, cumulative } = retentionFilter || {}
const { period, retentionReference } = retentionFilter || {}
// If the retention reference option is specified as previous,
// then translate retention rates to relative to previous,
// otherwise, just use what the result was originally.
Expand All @@ -47,20 +47,10 @@ export const retentionLineGraphLogic = kea<retentionLineGraphLogicType>([
// convergence.

return results.map((cohortRetention, datasetIndex) => {
let retentionPercentages = cohortRetention.values
const retentionPercentages = cohortRetention.values
.map((value) => value.count / cohortRetention.values[0].count)
.map((value) => (isNaN(value) ? 0 : 100 * value))

if (cumulative) {
retentionPercentages = retentionPercentages.map((value, valueIndex, arr) => {
let cumulativeValue = value
for (let i = valueIndex + 1; i < arr.length; i++) {
cumulativeValue += arr[i]
}
return Math.min(cumulativeValue, 100)
})
}

// To calculate relative percentages, we take for instance Cohort 1 as percentages
// of the cohort size and create another series that has a 100 at prepended so we have
//
Expand Down
17 changes: 4 additions & 13 deletions frontend/src/scenes/retention/retentionTableLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ export const retentionTableLogic = kea<retentionTableLogicType>([
tableRows: [
(s) => [s.results, s.maxIntervalsCount, s.retentionFilter, s.breakdownFilter, s.hideSizeColumn],
(results, maxIntervalsCount, retentionFilter, breakdownFilter, hideSizeColumn) => {
const { period, cumulative } = retentionFilter || {}
const { period } = retentionFilter || {}
const { breakdowns } = breakdownFilter || {}

return range(maxIntervalsCount).map((index: number) => {
Expand Down Expand Up @@ -99,21 +99,12 @@ export const retentionTableLogic = kea<retentionTableLogicType>([

const secondColumn = hideSizeColumn ? [] : [currentResult.values[0].count]

const otherColumns = currentResult.values.map((value, valueIndex) => {
const otherColumns = currentResult.values.map((value) => {
const totalCount = currentResult.values[0]['count']
let count = value['count']

if (cumulative && valueIndex > 0) {
for (let i = valueIndex + 1; i < currentResult.values.length; i++) {
count += currentResult.values[i]['count']
}
count = Math.min(count, totalCount)
}

const percentage = totalCount > 0 ? (count / totalCount) * 100 : 0
const percentage = totalCount > 0 ? (value['count'] / totalCount) * 100 : 0

return {
count,
count: value['count'],
percentage,
}
})
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2287,10 +2287,10 @@ export interface RetentionFilterType extends FilterType {
returning_entity?: RetentionEntity
target_entity?: RetentionEntity
period?: RetentionPeriod
cumulative?: boolean

//frontend only
show_mean?: boolean
cumulative?: boolean
}
export interface LifecycleFilterType extends FilterType {
/** @deprecated */
Expand Down
3 changes: 0 additions & 3 deletions mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,6 @@ posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Module "d
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Statement is unreachable [unreachable]
posthog/hogql_queries/insights/stickiness_query_runner.py:0: error: Module "django.utils.timezone" does not explicitly export attribute "datetime" [attr-defined]
posthog/hogql_queries/insights/retention_query_runner.py:0: error: Item "None" of "JoinExpr | None" has no attribute "sample" [union-attr]
posthog/hogql_queries/insights/retention_query_runner.py:0: error: Argument 2 to "parse_select" has incompatible type "dict[str, SelectQuery]"; expected "dict[str, Expr] | None" [arg-type]
posthog/hogql_queries/insights/retention_query_runner.py:0: note: "Dict" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance
posthog/hogql_queries/insights/retention_query_runner.py:0: note: Consider using "Mapping" instead, which is covariant in the value type
posthog/hogql_queries/insights/retention_query_runner.py:0: error: Unsupported operand types for - ("int" and "None") [operator]
posthog/hogql_queries/insights/retention_query_runner.py:0: note: Right operand is of type "int | None"
posthog/hogql_queries/insights/retention_query_runner.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "select" [union-attr]
Expand Down
32 changes: 23 additions & 9 deletions posthog/hogql_queries/insights/retention_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def events_where_clause(self, event_query_type: RetentionQueryType):

return events_where

def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.SelectQuery:
def actor_query(self, breakdown_values_filter: Optional[int] = None, cumulative: bool = False) -> ast.SelectQuery:
start_of_interval_sql = self.query_date_range.get_start_of_interval_hogql(
source=ast.Field(chain=["events", "timestamp"])
)
Expand Down Expand Up @@ -203,6 +203,10 @@ def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.Sele
),
)

intervals_from_base_array_aggregator = "arrayJoin"
if cumulative:
intervals_from_base_array_aggregator = "arrayMax"

inner_query = ast.SelectQuery(
select=[
ast.Alias(alias="actor_id", expr=ast.Field(chain=["events", target_field])),
Expand Down Expand Up @@ -269,11 +273,11 @@ def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.Sele
ast.Alias(
alias="intervals_from_base",
expr=parse_expr(
"""
arrayJoin(
f"""
{intervals_from_base_array_aggregator}(
arrayConcat(
if(
{is_first_interval_from_base},
{{is_first_interval_from_base}},
[0],
[]
),
Expand Down Expand Up @@ -313,11 +317,21 @@ def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.Sele
return inner_query

def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
placeholders = {
"actor_query": self.actor_query(),
}

with self.timings.measure("retention_query"):
if self.query.retentionFilter.cumulative:
actor_query = parse_select(
"""
SELECT
actor_id,
arrayJoin(range(0, intervals_from_base + 1)) as intervals_from_base,
breakdown_values
FROM {actor_query}
""",
{"actor_query": self.actor_query(cumulative=True)},
)
else:
actor_query = self.actor_query()

retention_query = parse_select(
"""
SELECT [actor_activity.breakdown_values] AS breakdown_values,
Expand All @@ -334,7 +348,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
LIMIT 10000
""",
placeholders,
{"actor_query": actor_query},
timings=self.timings,
)
return retention_query
Expand Down
39 changes: 39 additions & 0 deletions posthog/hogql_queries/insights/test/test_retention_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,45 @@ def test_interval_rounding(self):
],
)

def test_rolling_retention(self):
_create_person(team_id=self.team.pk, distinct_ids=["person1"])
_create_person(team_id=self.team.pk, distinct_ids=["person2"])
_create_person(team_id=self.team.pk, distinct_ids=["person3"])
_create_person(team_id=self.team.pk, distinct_ids=["person4"])

_create_events(
self.team,
[
("person1", _date(0)),
("person1", _date(3)),
("person1", _date(4)),
("person2", _date(0)),
("person2", _date(1)),
("person3", _date(1)),
("person3", _date(2)),
("person3", _date(3)),
("person4", _date(3)),
("person4", _date(4)),
("person5", _date(4)),
],
)

result = self.run_query(
query={
"dateRange": {"date_to": _date(5, hour=6)},
"retentionFilter": {
"cumulative": True,
"totalIntervals": 5,
"targetEntity": {"id": None, "name": "All events"},
"returningEntity": {"id": None, "name": "All events"},
},
}
)
self.assertEqual(
pluck(result, "values", "count"),
[[2, 1, 1, 0, 0], [1, 1, 0, 0], [3, 2, 0], [2, 0], [0]],
)

def test_all_events(self):
_create_person(team_id=self.team.pk, distinct_ids=["person1", "alias1"])
_create_person(team_id=self.team.pk, distinct_ids=["person2"])
Expand Down
1 change: 0 additions & 1 deletion posthog/schema_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def serialize_query(self, next_serializer):
"toggledLifecycles",
"showLabelsOnSeries",
"showMean",
"cumulative",
"yAxisScaleType",
]
}
Expand Down

0 comments on commit 0a71651

Please sign in to comment.