Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: rolling retention #24926

Merged
merged 5 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 15 additions & 27 deletions frontend/src/scenes/retention/RetentionModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export function RetentionModal(): JSX.Element | null {
const { results } = useValues(retentionLogic(insightProps))
const { people, peopleLoading, peopleLoadingMore } = useValues(retentionPeopleLogic(insightProps))
const { loadMorePeople } = useActions(retentionPeopleLogic(insightProps))
const { aggregationTargetLabel, selectedInterval, exploreUrl, actorsQuery, retentionFilter } = useValues(
const { aggregationTargetLabel, selectedInterval, exploreUrl, actorsQuery } = useValues(
retentionModalLogic(insightProps)
)
const { closeModal } = useActions(retentionModalLogic(insightProps))
Expand Down Expand Up @@ -111,32 +111,20 @@ export function RetentionModal(): JSX.Element | null {
<tbody>
<tr>
<th>{capitalizeFirstLetter(aggregationTargetLabel.singular)}</th>
{row.values?.map((data: any, index: number) => {
let cumulativeCount = data.count
if (retentionFilter?.cumulative) {
for (let i = index + 1; i < row.values.length; i++) {
cumulativeCount += row.values[i].count
}
cumulativeCount = Math.min(cumulativeCount, row.values[0].count)
}
const percentageValue =
row.values[0].count > 0 ? cumulativeCount / row.values[0].count : 0

return (
<th key={index}>
<div>{results[index].label}</div>
<div>
{cumulativeCount}
&nbsp;
{cumulativeCount > 0 && (
<span className="text-muted">
({percentage(percentageValue)})
</span>
)}
</div>
</th>
)
})}
{row.values?.map((data: any, index: number) => (
<th key={index}>
<div>{results[index].label}</div>
<div>
{data.count}
&nbsp;
{data.count > 0 && (
<span className="text-muted">
({percentage(data.count / row?.values[0]['count'])})
</span>
)}
</div>
</th>
))}
</tr>
{people.result &&
people.result.map((personAppearances: RetentionTableAppearanceType) => (
Expand Down
14 changes: 2 additions & 12 deletions frontend/src/scenes/retention/retentionLineGraphLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export const retentionLineGraphLogic = kea<retentionLineGraphLogicType>([
trendSeries: [
(s) => [s.results, s.retentionFilter],
(results, retentionFilter): RetentionTrendPayload[] => {
const { period, retentionReference, cumulative } = retentionFilter || {}
const { period, retentionReference } = retentionFilter || {}
// If the retention reference option is specified as previous,
// then translate retention rates to relative to previous,
// otherwise, just use what the result was originally.
Expand All @@ -47,20 +47,10 @@ export const retentionLineGraphLogic = kea<retentionLineGraphLogicType>([
// convergence.

return results.map((cohortRetention, datasetIndex) => {
let retentionPercentages = cohortRetention.values
const retentionPercentages = cohortRetention.values
.map((value) => value.count / cohortRetention.values[0].count)
.map((value) => (isNaN(value) ? 0 : 100 * value))

if (cumulative) {
retentionPercentages = retentionPercentages.map((value, valueIndex, arr) => {
let cumulativeValue = value
for (let i = valueIndex + 1; i < arr.length; i++) {
cumulativeValue += arr[i]
}
return Math.min(cumulativeValue, 100)
})
}

// To calculate relative percentages, we take for instance Cohort 1 as percentages
// of the cohort size and create another series that has a 100 at prepended so we have
//
Expand Down
17 changes: 4 additions & 13 deletions frontend/src/scenes/retention/retentionTableLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ export const retentionTableLogic = kea<retentionTableLogicType>([
tableRows: [
(s) => [s.results, s.maxIntervalsCount, s.retentionFilter, s.breakdownFilter, s.hideSizeColumn],
(results, maxIntervalsCount, retentionFilter, breakdownFilter, hideSizeColumn) => {
const { period, cumulative } = retentionFilter || {}
const { period } = retentionFilter || {}
const { breakdowns } = breakdownFilter || {}

return range(maxIntervalsCount).map((index: number) => {
Expand Down Expand Up @@ -99,21 +99,12 @@ export const retentionTableLogic = kea<retentionTableLogicType>([

const secondColumn = hideSizeColumn ? [] : [currentResult.values[0].count]

const otherColumns = currentResult.values.map((value, valueIndex) => {
const otherColumns = currentResult.values.map((value) => {
const totalCount = currentResult.values[0]['count']
let count = value['count']

if (cumulative && valueIndex > 0) {
for (let i = valueIndex + 1; i < currentResult.values.length; i++) {
count += currentResult.values[i]['count']
}
count = Math.min(count, totalCount)
}

const percentage = totalCount > 0 ? (count / totalCount) * 100 : 0
const percentage = totalCount > 0 ? (value['count'] / totalCount) * 100 : 0

return {
count,
count: value['count'],
percentage,
}
})
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2287,10 +2287,10 @@ export interface RetentionFilterType extends FilterType {
returning_entity?: RetentionEntity
target_entity?: RetentionEntity
period?: RetentionPeriod
cumulative?: boolean

//frontend only
show_mean?: boolean
cumulative?: boolean
}
export interface LifecycleFilterType extends FilterType {
/** @deprecated */
Expand Down
3 changes: 0 additions & 3 deletions mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,6 @@ posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Module "d
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Statement is unreachable [unreachable]
posthog/hogql_queries/insights/stickiness_query_runner.py:0: error: Module "django.utils.timezone" does not explicitly export attribute "datetime" [attr-defined]
posthog/hogql_queries/insights/retention_query_runner.py:0: error: Item "None" of "JoinExpr | None" has no attribute "sample" [union-attr]
posthog/hogql_queries/insights/retention_query_runner.py:0: error: Argument 2 to "parse_select" has incompatible type "dict[str, SelectQuery]"; expected "dict[str, Expr] | None" [arg-type]
posthog/hogql_queries/insights/retention_query_runner.py:0: note: "Dict" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance
posthog/hogql_queries/insights/retention_query_runner.py:0: note: Consider using "Mapping" instead, which is covariant in the value type
posthog/hogql_queries/insights/retention_query_runner.py:0: error: Unsupported operand types for - ("int" and "None") [operator]
posthog/hogql_queries/insights/retention_query_runner.py:0: note: Right operand is of type "int | None"
posthog/hogql_queries/insights/retention_query_runner.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "select" [union-attr]
Expand Down
32 changes: 23 additions & 9 deletions posthog/hogql_queries/insights/retention_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def events_where_clause(self, event_query_type: RetentionQueryType):

return events_where

def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.SelectQuery:
def actor_query(self, breakdown_values_filter: Optional[int] = None, cumulative: bool = False) -> ast.SelectQuery:
start_of_interval_sql = self.query_date_range.get_start_of_interval_hogql(
source=ast.Field(chain=["events", "timestamp"])
)
Expand Down Expand Up @@ -203,6 +203,10 @@ def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.Sele
),
)

intervals_from_base_array_aggregator = "arrayJoin"
if cumulative:
intervals_from_base_array_aggregator = "arrayMax"

inner_query = ast.SelectQuery(
select=[
ast.Alias(alias="actor_id", expr=ast.Field(chain=["events", target_field])),
Expand Down Expand Up @@ -269,11 +273,11 @@ def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.Sele
ast.Alias(
alias="intervals_from_base",
expr=parse_expr(
"""
arrayJoin(
f"""
{intervals_from_base_array_aggregator}(
arrayConcat(
if(
{is_first_interval_from_base},
{{is_first_interval_from_base}},
[0],
[]
),
Expand Down Expand Up @@ -313,11 +317,21 @@ def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.Sele
return inner_query

def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
placeholders = {
"actor_query": self.actor_query(),
}

with self.timings.measure("retention_query"):
if self.query.retentionFilter.cumulative:
actor_query = parse_select(
"""
SELECT
actor_id,
arrayJoin(range(0, intervals_from_base + 1)) as intervals_from_base,
breakdown_values
FROM {actor_query}
""",
{"actor_query": self.actor_query(cumulative=True)},
)
else:
actor_query = self.actor_query()

retention_query = parse_select(
"""
SELECT [actor_activity.breakdown_values] AS breakdown_values,
Expand All @@ -334,7 +348,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:

LIMIT 10000
""",
placeholders,
{"actor_query": actor_query},
timings=self.timings,
)
return retention_query
Expand Down
39 changes: 39 additions & 0 deletions posthog/hogql_queries/insights/test/test_retention_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,45 @@ def test_interval_rounding(self):
],
)

def test_rolling_retention(self):
_create_person(team_id=self.team.pk, distinct_ids=["person1"])
_create_person(team_id=self.team.pk, distinct_ids=["person2"])
_create_person(team_id=self.team.pk, distinct_ids=["person3"])
_create_person(team_id=self.team.pk, distinct_ids=["person4"])

_create_events(
self.team,
[
("person1", _date(0)),
("person1", _date(3)),
("person1", _date(4)),
("person2", _date(0)),
("person2", _date(1)),
("person3", _date(1)),
("person3", _date(2)),
("person3", _date(3)),
("person4", _date(3)),
("person4", _date(4)),
("person5", _date(4)),
],
)

result = self.run_query(
query={
"dateRange": {"date_to": _date(5, hour=6)},
"retentionFilter": {
"cumulative": True,
"totalIntervals": 5,
"targetEntity": {"id": None, "name": "All events"},
"returningEntity": {"id": None, "name": "All events"},
},
}
)
self.assertEqual(
pluck(result, "values", "count"),
[[2, 1, 1, 0, 0], [1, 1, 0, 0], [3, 2, 0], [2, 0], [0]],
)

def test_all_events(self):
_create_person(team_id=self.team.pk, distinct_ids=["person1", "alias1"])
_create_person(team_id=self.team.pk, distinct_ids=["person2"])
Expand Down
1 change: 0 additions & 1 deletion posthog/schema_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def serialize_query(self, next_serializer):
"toggledLifecycles",
"showLabelsOnSeries",
"showMean",
"cumulative",
"yAxisScaleType",
]
}
Expand Down
Loading