From 62ed241942bac6309bcdc061cf685cd83dcd8c0e Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Wed, 11 Sep 2024 14:45:54 -0700 Subject: [PATCH 1/4] rolling in progress --- .../src/scenes/retention/RetentionModal.tsx | 42 +++++++------------ .../src/scenes/retention/RetentionTable.tsx | 16 +++---- .../retention/retentionLineGraphLogic.ts | 14 +------ .../scenes/retention/retentionTableLogic.ts | 17 ++------ .../insights/retention_query_runner.py | 32 ++++++++++---- 5 files changed, 52 insertions(+), 69 deletions(-) diff --git a/frontend/src/scenes/retention/RetentionModal.tsx b/frontend/src/scenes/retention/RetentionModal.tsx index 4dbb46e896a76..f80ea150b52b4 100644 --- a/frontend/src/scenes/retention/RetentionModal.tsx +++ b/frontend/src/scenes/retention/RetentionModal.tsx @@ -27,7 +27,7 @@ export function RetentionModal(): JSX.Element | null { const { results } = useValues(retentionLogic(insightProps)) const { people, peopleLoading, peopleLoadingMore } = useValues(retentionPeopleLogic(insightProps)) const { loadMorePeople } = useActions(retentionPeopleLogic(insightProps)) - const { aggregationTargetLabel, selectedInterval, exploreUrl, actorsQuery, retentionFilter } = useValues( + const { aggregationTargetLabel, selectedInterval, exploreUrl, actorsQuery } = useValues( retentionModalLogic(insightProps) ) const { closeModal } = useActions(retentionModalLogic(insightProps)) @@ -111,32 +111,20 @@ export function RetentionModal(): JSX.Element | null { {capitalizeFirstLetter(aggregationTargetLabel.singular)} - {row.values?.map((data: any, index: number) => { - let cumulativeCount = data.count - if (retentionFilter?.cumulative) { - for (let i = index + 1; i < row.values.length; i++) { - cumulativeCount += row.values[i].count - } - cumulativeCount = Math.min(cumulativeCount, row.values[0].count) - } - const percentageValue = - row.values[0].count > 0 ? cumulativeCount / row.values[0].count : 0 - - return ( - -
{results[index].label}
-
- {cumulativeCount} -   - {cumulativeCount > 0 && ( - - ({percentage(percentageValue)}) - - )} -
- - ) - })} + {row.values?.map((data: any, index: number) => ( + +
{results[index].label}
+
+ {data.count} +   + {data.count > 0 && ( + + ({percentage(data.count / row?.values[0]['count'])}) + + )} +
+ + ))} {people.result && people.result.map((personAppearances: RetentionTableAppearanceType) => ( diff --git a/frontend/src/scenes/retention/RetentionTable.tsx b/frontend/src/scenes/retention/RetentionTable.tsx index 7fb6d4a0c968f..76cf7c17e29dc 100644 --- a/frontend/src/scenes/retention/RetentionTable.tsx +++ b/frontend/src/scenes/retention/RetentionTable.tsx @@ -35,7 +35,7 @@ export function RetentionTable({ inCardView = false }: { inCardView?: boolean }) {showMean && tableRows.length > 0 ? ( - {range(0, tableRows[0].length).map((columnIndex) => ( + {range(0, tableRows[0].length - 1).map((columnIndex) => ( {columnIndex <= (hideSizeColumn ? 0 : 1) ? ( columnIndex == 0 ? ( @@ -47,23 +47,23 @@ export function RetentionTable({ inCardView = false }: { inCardView?: boolean }) mean( tableRows.map((row) => { // Stop before the last item in a row, which is an incomplete time period - if ( - (columnIndex >= row.length - 1 && isLatestPeriod) || - !row[columnIndex] - ) { - return null + if (columnIndex < row.length - 1) { + return row[columnIndex].percentage } - return row[columnIndex].percentage + return null }) ) || 0 } - latest={isLatestPeriod && columnIndex == tableRows[0].length - 1} + latest={columnIndex == tableRows[0].length - 1} clickable={false} backgroundColor={PURPLE} /> )} ))} + + + ) : undefined} diff --git a/frontend/src/scenes/retention/retentionLineGraphLogic.ts b/frontend/src/scenes/retention/retentionLineGraphLogic.ts index 7815a68525b83..0ab2401f7c38e 100644 --- a/frontend/src/scenes/retention/retentionLineGraphLogic.ts +++ b/frontend/src/scenes/retention/retentionLineGraphLogic.ts @@ -29,7 +29,7 @@ export const retentionLineGraphLogic = kea([ trendSeries: [ (s) => [s.results, s.retentionFilter], (results, retentionFilter): RetentionTrendPayload[] => { - const { period, retentionReference, cumulative } = retentionFilter || {} + const { period, retentionReference } = retentionFilter || {} // If the retention reference option is specified as previous, // then translate retention rates to relative to previous, // otherwise, just use what the result was originally. @@ -47,20 +47,10 @@ export const retentionLineGraphLogic = kea([ // convergence. return results.map((cohortRetention, datasetIndex) => { - let retentionPercentages = cohortRetention.values + const retentionPercentages = cohortRetention.values .map((value) => value.count / cohortRetention.values[0].count) .map((value) => (isNaN(value) ? 0 : 100 * value)) - if (cumulative) { - retentionPercentages = retentionPercentages.map((value, valueIndex, arr) => { - let cumulativeValue = value - for (let i = valueIndex + 1; i < arr.length; i++) { - cumulativeValue += arr[i] - } - return Math.min(cumulativeValue, 100) - }) - } - // To calculate relative percentages, we take for instance Cohort 1 as percentages // of the cohort size and create another series that has a 100 at prepended so we have // diff --git a/frontend/src/scenes/retention/retentionTableLogic.ts b/frontend/src/scenes/retention/retentionTableLogic.ts index 6ba3d44a15e50..0969d1ffa6f53 100644 --- a/frontend/src/scenes/retention/retentionTableLogic.ts +++ b/frontend/src/scenes/retention/retentionTableLogic.ts @@ -69,7 +69,7 @@ export const retentionTableLogic = kea([ tableRows: [ (s) => [s.results, s.maxIntervalsCount, s.retentionFilter, s.breakdownFilter, s.hideSizeColumn], (results, maxIntervalsCount, retentionFilter, breakdownFilter, hideSizeColumn) => { - const { period, cumulative } = retentionFilter || {} + const { period } = retentionFilter || {} const { breakdowns } = breakdownFilter || {} return range(maxIntervalsCount).map((index: number) => { @@ -99,21 +99,12 @@ export const retentionTableLogic = kea([ const secondColumn = hideSizeColumn ? [] : [currentResult.values[0].count] - const otherColumns = currentResult.values.map((value, valueIndex) => { + const otherColumns = currentResult.values.map((value) => { const totalCount = currentResult.values[0]['count'] - let count = value['count'] - - if (cumulative && valueIndex > 0) { - for (let i = valueIndex + 1; i < currentResult.values.length; i++) { - count += currentResult.values[i]['count'] - } - count = Math.min(count, totalCount) - } - - const percentage = totalCount > 0 ? (count / totalCount) * 100 : 0 + const percentage = totalCount > 0 ? (value['count'] / totalCount) * 100 : 0 return { - count, + count: value['count'], percentage, } }) diff --git a/posthog/hogql_queries/insights/retention_query_runner.py b/posthog/hogql_queries/insights/retention_query_runner.py index 9bbb21e109a19..27db6819148e1 100644 --- a/posthog/hogql_queries/insights/retention_query_runner.py +++ b/posthog/hogql_queries/insights/retention_query_runner.py @@ -124,7 +124,7 @@ def events_where_clause(self, event_query_type: RetentionQueryType): return events_where - def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.SelectQuery: + def actor_query(self, breakdown_values_filter: Optional[int] = None, cumulative: bool = False) -> ast.SelectQuery: start_of_interval_sql = self.query_date_range.get_start_of_interval_hogql( source=ast.Field(chain=["events", "timestamp"]) ) @@ -203,6 +203,10 @@ def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.Sele ), ) + intervals_from_base_array_aggregator = "arrayJoin" + if cumulative: + intervals_from_base_array_aggregator = "arrayMax" + inner_query = ast.SelectQuery( select=[ ast.Alias(alias="actor_id", expr=ast.Field(chain=["events", target_field])), @@ -269,11 +273,11 @@ def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.Sele ast.Alias( alias="intervals_from_base", expr=parse_expr( - """ - arrayJoin( + f""" + {intervals_from_base_array_aggregator}( arrayConcat( if( - {is_first_interval_from_base}, + {{is_first_interval_from_base}}, [0], [] ), @@ -313,11 +317,21 @@ def actor_query(self, breakdown_values_filter: Optional[int] = None) -> ast.Sele return inner_query def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: - placeholders = { - "actor_query": self.actor_query(), - } - with self.timings.measure("retention_query"): + if self.query.retentionFilter.cumulative: + actor_query = parse_select( + """ + SELECT + actor_id, + arrayJoin(range(0, intervals_from_base + 1)) as intervals_from_base, + breakdown_values + FROM {actor_query} + """, + {"actor_query": self.actor_query(cumulative=True)}, + ) + else: + actor_query = self.actor_query() + retention_query = parse_select( """ SELECT [actor_activity.breakdown_values] AS breakdown_values, @@ -334,7 +348,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: LIMIT 10000 """, - placeholders, + {"actor_query": actor_query}, timings=self.timings, ) return retention_query From 9ec0094ea4bcaf30c572f32782e2bb6903556d33 Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Wed, 11 Sep 2024 15:13:51 -0700 Subject: [PATCH 2/4] retention --- frontend/src/types.ts | 2 +- .../test/test_retention_query_runner.py | 39 +++++++++++++++++++ posthog/schema_helpers.py | 1 - 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/frontend/src/types.ts b/frontend/src/types.ts index c981ee4428725..f1d6dde50d189 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -2287,10 +2287,10 @@ export interface RetentionFilterType extends FilterType { returning_entity?: RetentionEntity target_entity?: RetentionEntity period?: RetentionPeriod + cumulative?: boolean //frontend only show_mean?: boolean - cumulative?: boolean } export interface LifecycleFilterType extends FilterType { /** @deprecated */ diff --git a/posthog/hogql_queries/insights/test/test_retention_query_runner.py b/posthog/hogql_queries/insights/test/test_retention_query_runner.py index bc97e8b7d51d3..bd42a0513b4dc 100644 --- a/posthog/hogql_queries/insights/test/test_retention_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_retention_query_runner.py @@ -826,6 +826,45 @@ def test_interval_rounding(self): ], ) + def test_rolling_retention(self): + _create_person(team_id=self.team.pk, distinct_ids=["person1"]) + _create_person(team_id=self.team.pk, distinct_ids=["person2"]) + _create_person(team_id=self.team.pk, distinct_ids=["person3"]) + _create_person(team_id=self.team.pk, distinct_ids=["person4"]) + + _create_events( + self.team, + [ + ("person1", _date(0)), + ("person1", _date(3)), + ("person1", _date(4)), + ("person2", _date(0)), + ("person2", _date(1)), + ("person3", _date(1)), + ("person3", _date(2)), + ("person3", _date(3)), + ("person4", _date(3)), + ("person4", _date(4)), + ("person5", _date(4)), + ], + ) + + result = self.run_query( + query={ + "dateRange": {"date_to": _date(5, hour=6)}, + "retentionFilter": { + "cumulative": True, + "totalIntervals": 5, + "targetEntity": {"id": None, "name": "All events"}, + "returningEntity": {"id": None, "name": "All events"}, + }, + } + ) + self.assertEqual( + pluck(result, "values", "count"), + [[2, 1, 1, 0, 0], [1, 1, 0, 0], [3, 2, 0], [2, 0], [0]], + ) + def test_all_events(self): _create_person(team_id=self.team.pk, distinct_ids=["person1", "alias1"]) _create_person(team_id=self.team.pk, distinct_ids=["person2"]) diff --git a/posthog/schema_helpers.py b/posthog/schema_helpers.py index 126c68ca29a6c..356e49c98a063 100644 --- a/posthog/schema_helpers.py +++ b/posthog/schema_helpers.py @@ -70,7 +70,6 @@ def serialize_query(self, next_serializer): "toggledLifecycles", "showLabelsOnSeries", "showMean", - "cumulative", "yAxisScaleType", ] } From 32068a62aeb8e82e63f104954d8138fff6a2c664 Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Wed, 11 Sep 2024 15:36:56 -0700 Subject: [PATCH 3/4] baseline sync --- mypy-baseline.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 27ce3e7bd8286..e82f5b54023e2 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -390,9 +390,6 @@ posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Module "d posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Statement is unreachable [unreachable] posthog/hogql_queries/insights/stickiness_query_runner.py:0: error: Module "django.utils.timezone" does not explicitly export attribute "datetime" [attr-defined] posthog/hogql_queries/insights/retention_query_runner.py:0: error: Item "None" of "JoinExpr | None" has no attribute "sample" [union-attr] -posthog/hogql_queries/insights/retention_query_runner.py:0: error: Argument 2 to "parse_select" has incompatible type "dict[str, SelectQuery]"; expected "dict[str, Expr] | None" [arg-type] -posthog/hogql_queries/insights/retention_query_runner.py:0: note: "Dict" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance -posthog/hogql_queries/insights/retention_query_runner.py:0: note: Consider using "Mapping" instead, which is covariant in the value type posthog/hogql_queries/insights/retention_query_runner.py:0: error: Unsupported operand types for - ("int" and "None") [operator] posthog/hogql_queries/insights/retention_query_runner.py:0: note: Right operand is of type "int | None" posthog/hogql_queries/insights/retention_query_runner.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "select" [union-attr] From edab500ea3ab4ba22512166bf4addfcecf769696 Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Thu, 12 Sep 2024 07:38:28 -0700 Subject: [PATCH 4/4] fix --- frontend/src/scenes/retention/RetentionTable.tsx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/frontend/src/scenes/retention/RetentionTable.tsx b/frontend/src/scenes/retention/RetentionTable.tsx index 76cf7c17e29dc..7fb6d4a0c968f 100644 --- a/frontend/src/scenes/retention/RetentionTable.tsx +++ b/frontend/src/scenes/retention/RetentionTable.tsx @@ -35,7 +35,7 @@ export function RetentionTable({ inCardView = false }: { inCardView?: boolean }) {showMean && tableRows.length > 0 ? ( - {range(0, tableRows[0].length - 1).map((columnIndex) => ( + {range(0, tableRows[0].length).map((columnIndex) => ( {columnIndex <= (hideSizeColumn ? 0 : 1) ? ( columnIndex == 0 ? ( @@ -47,23 +47,23 @@ export function RetentionTable({ inCardView = false }: { inCardView?: boolean }) mean( tableRows.map((row) => { // Stop before the last item in a row, which is an incomplete time period - if (columnIndex < row.length - 1) { - return row[columnIndex].percentage + if ( + (columnIndex >= row.length - 1 && isLatestPeriod) || + !row[columnIndex] + ) { + return null } - return null + return row[columnIndex].percentage }) ) || 0 } - latest={columnIndex == tableRows[0].length - 1} + latest={isLatestPeriod && columnIndex == tableRows[0].length - 1} clickable={false} backgroundColor={PURPLE} /> )} ))} - - - ) : undefined}