Skip to content

Commit

Permalink
chore(insights): Yeet groups-on-events from tests + optimizer (#23320)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
Twixes and github-actions[bot] authored Jul 11, 2024
1 parent 9a41312 commit 26f0438
Show file tree
Hide file tree
Showing 21 changed files with 29 additions and 208 deletions.
22 changes: 0 additions & 22 deletions ee/clickhouse/materialized_columns/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,6 @@ def event_properties(self, team_id: str) -> set[str]:
def person_on_events_properties(self, team_id: str) -> set[str]:
return self._get_properties(GET_EVENT_PROPERTIES_COUNT.format(column_name="person_properties"), team_id)

@instance_memoize
def group_on_events_properties(self, group_type_index: int, team_id: str) -> set[str]:
return self._get_properties(
GET_EVENT_PROPERTIES_COUNT.format(column_name=f"group{group_type_index}_properties"),
team_id,
)

def _get_properties(self, query, team_id) -> set[str]:
rows = sync_execute(query, {"team_id": team_id})
return {name for name, _ in rows}
Expand Down Expand Up @@ -99,11 +92,6 @@ def properties(
person_props = team_manager.person_properties(self.team_id)
event_props = team_manager.event_properties(self.team_id)
person_on_events_props = team_manager.person_on_events_properties(self.team_id)
group0_props = team_manager.group_on_events_properties(0, self.team_id)
group1_props = team_manager.group_on_events_properties(1, self.team_id)
group2_props = team_manager.group_on_events_properties(2, self.team_id)
group3_props = team_manager.group_on_events_properties(3, self.team_id)
group4_props = team_manager.group_on_events_properties(4, self.team_id)

for table_column, property in self._all_properties:
if property in event_props:
Expand All @@ -113,16 +101,6 @@ def properties(

if property in person_on_events_props and "person_properties" in table_column:
yield "events", "person_properties", property
if property in group0_props and "group0_properties" in table_column:
yield "events", "group0_properties", property
if property in group1_props and "group1_properties" in table_column:
yield "events", "group1_properties", property
if property in group2_props and "group2_properties" in table_column:
yield "events", "group2_properties", property
if property in group3_props and "group3_properties" in table_column:
yield "events", "group3_properties", property
if property in group4_props and "group4_properties" in table_column:
yield "events", "group4_properties", property


def _analyze(since_hours_ago: int, min_query_time: int, team_id: Optional[int] = None) -> list[Suggestion]:
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/materialized_columns/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def _materialized_column_name(
) -> str:
"Returns a sanitized and unique column name to use for materialized column"

prefix = "mat_" if table == "events" or table == "groups" else "pmat_"
prefix = "pmat_" if table == "person" else "mat_"

if table_column != DEFAULT_TABLE_COLUMN:
prefix += f"{SHORT_TABLE_COLUMN_NAME[table_column]}_"
Expand Down
19 changes: 0 additions & 19 deletions ee/clickhouse/queries/column_optimizer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from collections import Counter as TCounter
from typing import cast

from posthog.clickhouse.materialized_columns.column import ColumnName
from posthog.constants import TREND_FILTER_TYPE_ACTIONS, FunnelCorrelationType
from posthog.models.action.util import get_action_tables_and_properties
from posthog.models.filters.mixins.utils import cached_property
Expand All @@ -24,24 +23,6 @@ def group_types_to_query(self) -> set[GroupTypeIndex]:
used_properties = self.used_properties_with_type("group")
return {cast(GroupTypeIndex, group_type_index) for _, _, group_type_index in used_properties}

@cached_property
def group_on_event_columns_to_query(self) -> set[ColumnName]:
"Returns a list of event table group columns containing materialized properties that this query needs"
used_properties = self.used_properties_with_type("group")

columns_to_query: set[ColumnName] = set()

for group_type_index in range(5):
columns_to_query = columns_to_query.union(
self.columns_to_query(
"events",
{property for property in used_properties if property[2] == group_type_index},
f"group{group_type_index}_properties",
)
)

return columns_to_query

@cached_property
def properties_used_in_filter(self) -> TCounter[PropertyIdentifier]:
"Returns collection of properties + types that this query would use"
Expand Down
5 changes: 0 additions & 5 deletions ee/clickhouse/queries/funnels/test/breakdown_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
)
from posthog.test.base import (
APIBaseTest,
also_test_with_materialized_columns,
snapshot_clickhouse_queries,
also_test_with_person_on_events_v2,
)
Expand Down Expand Up @@ -307,10 +306,6 @@ def test_funnel_aggregate_by_groups_breakdown_group(self):
],
)

@also_test_with_materialized_columns(
group_properties=[(0, "industry")],
materialize_only_with_person_on_events=True,
)
@also_test_with_person_on_events_v2
@snapshot_clickhouse_queries
def test_funnel_aggregate_by_groups_breakdown_group_person_on_events(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,6 @@ def test_funnel_correlation_with_properties_and_groups(self):
@also_test_with_materialized_columns(
event_properties=[],
person_properties=["$browser"],
group_properties=[(0, "industry")],
verify_no_jsonextract=False,
)
@also_test_with_person_on_events_v2
Expand Down
36 changes: 1 addition & 35 deletions ee/clickhouse/queries/test/test_column_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,20 +229,6 @@ def test_materialized_columns_checks_person_on_events(self):
BASE_FILTER.shallow_clone(
{
"properties": [
{
"key": "group_prop",
"value": ["value"],
"operator": "exact",
"type": "group",
"group_type_index": 2,
},
{
"key": "group_prop",
"value": ["value"],
"operator": "exact",
"type": "group",
"group_type_index": 0,
},
{
"key": "person_prop",
"value": ["value"],
Expand All @@ -256,35 +242,15 @@ def test_materialized_columns_checks_person_on_events(self):
)

self.assertEqual(optimizer().person_on_event_columns_to_query, {"person_properties"})
self.assertEqual(
optimizer().group_on_event_columns_to_query,
{"group0_properties", "group2_properties"},
)

# materialising the props on `person` or `group` table should make no difference
# materialising the props on `person` table should make no difference
materialize("person", "person_prop")
materialize("groups", "group_prop", table_column="group_properties")

self.assertEqual(optimizer().person_on_event_columns_to_query, {"person_properties"})
self.assertEqual(
optimizer().group_on_event_columns_to_query,
{"group0_properties", "group2_properties"},
)

materialize("events", "person_prop", table_column="person_properties")
materialize("events", "group_prop", table_column="group0_properties")

self.assertEqual(optimizer().person_on_event_columns_to_query, {"mat_pp_person_prop"})
self.assertEqual(
optimizer().group_on_event_columns_to_query,
{"mat_gp0_group_prop", "group2_properties"},
)

materialize("events", "group_prop", table_column="group2_properties")
self.assertEqual(
optimizer().group_on_event_columns_to_query,
{"mat_gp0_group_prop", "mat_gp2_group_prop"},
)

def test_group_types_to_query(self):
group_types_to_query = lambda filter: EnterpriseColumnOptimizer(filter, self.team.id).group_types_to_query
Expand Down
5 changes: 0 additions & 5 deletions ee/clickhouse/queries/test/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -3740,11 +3740,6 @@ def test_groups_filtering(self):
],
)

@also_test_with_materialized_columns(
["$current_url", "$screen_name"],
group_properties=[(0, "industry"), (1, "industry")],
materialize_only_with_person_on_events=True,
)
@snapshot_clickhouse_queries
def test_groups_filtering_person_on_events(self):
self._create_groups()
Expand Down
6 changes: 0 additions & 6 deletions ee/clickhouse/queries/test/test_retention.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from posthog.test.base import (
APIBaseTest,
ClickhouseTestMixin,
also_test_with_materialized_columns,
create_person_id_override_by_distinct_id,
snapshot_clickhouse_queries,
)
Expand Down Expand Up @@ -233,9 +232,6 @@ def test_groups_in_period(self):
self.assertEqual(actor_result[1]["person"]["id"], "org:6")
self.assertEqual(actor_result[1]["appearances"], [1, 1, 0, 1, 1, 0, 1])

@also_test_with_materialized_columns(
group_properties=[(0, "industry")], materialize_only_with_person_on_events=True
)
@snapshot_clickhouse_queries
def test_groups_filtering_person_on_events(self):
self._create_groups_and_events()
Expand Down Expand Up @@ -450,7 +446,6 @@ def test_groups_filtering_person_on_events_v2(self):
],
)

@also_test_with_materialized_columns(group_properties=[(0, "industry")])
@snapshot_clickhouse_queries
def test_groups_aggregating_person_on_events(self):
self._create_groups_and_events()
Expand Down Expand Up @@ -508,7 +503,6 @@ def test_groups_aggregating_person_on_events(self):
],
)

@also_test_with_materialized_columns(group_properties=[(0, "industry")])
@snapshot_clickhouse_queries
def test_groups_in_period_person_on_events(self):
from posthog.models.team import util
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,10 @@
(SELECT id
FROM person
WHERE team_id = 2
AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) )
AND (NOT ("pmat_email" ILIKE '%posthog.com%')) )
GROUP BY id
HAVING max(is_deleted) = 0
AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
AND (NOT (argMax(person."pmat_email", version) ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
WHERE team_id = 2
AND e.event = 'target event'
AND toDateTime(e.timestamp) >= toDateTime('2020-01-01 00:00:00', 'UTC')
Expand Down Expand Up @@ -263,10 +263,10 @@
(SELECT id
FROM person
WHERE team_id = 2
AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) )
AND (NOT ("pmat_email" ILIKE '%posthog.com%')) )
GROUP BY id
HAVING max(is_deleted) = 0
AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
AND (NOT (argMax(person."pmat_email", version) ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
WHERE team_id = 2
AND e.event = 'target event'
GROUP BY target
Expand Down Expand Up @@ -327,10 +327,10 @@
(SELECT id
FROM person
WHERE team_id = 2
AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) )
AND (NOT ("pmat_email" ILIKE '%posthog.com%')) )
GROUP BY id
HAVING max(is_deleted) = 0
AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
AND (NOT (argMax(person."pmat_email", version) ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
WHERE team_id = 2
AND e.event = 'target event'
AND toDateTime(e.timestamp) >= toDateTime('2020-01-01 00:00:00', 'UTC')
Expand Down Expand Up @@ -363,10 +363,10 @@
(SELECT id
FROM person
WHERE team_id = 2
AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) )
AND (NOT ("pmat_email" ILIKE '%posthog.com%')) )
GROUP BY id
HAVING max(is_deleted) = 0
AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
AND (NOT (argMax(person."pmat_email", version) ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
WHERE team_id = 2
AND e.event = 'target event'
GROUP BY target
Expand Down Expand Up @@ -423,10 +423,10 @@
(SELECT id
FROM person
WHERE team_id = 2
AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) )
AND (NOT ("pmat_email" ILIKE '%posthog.com%')) )
GROUP BY id
HAVING max(is_deleted) = 0
AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
AND (NOT (argMax(person."pmat_email", version) ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
WHERE team_id = 2
AND e.event = 'target event'
AND toDateTime(e.timestamp) >= toDateTime('2020-01-01 00:00:00', 'UTC')
Expand Down Expand Up @@ -459,10 +459,10 @@
(SELECT id
FROM person
WHERE team_id = 2
AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) )
AND (NOT ("pmat_email" ILIKE '%posthog.com%')) )
GROUP BY id
HAVING max(is_deleted) = 0
AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
AND (NOT (argMax(person."pmat_email", version) ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id
WHERE team_id = 2
AND e.event = 'target event'
GROUP BY target
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def _create_sample_data(self, num, delete=False):
team=self.team,
timestamp="2021-05-05 00:00:00",
properties={"$browser": "Chrome", "$group_0": "g0"},
group0_properties={"name": "g0"},
)
if delete:
person.delete()
Expand Down
1 change: 0 additions & 1 deletion mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ posthog/test/base.py:0: error: Incompatible types in assignment (expression has
posthog/test/base.py:0: error: Incompatible types in assignment (expression has type "None", variable has type "OrganizationMembership") [assignment]
posthog/test/base.py:0: error: "MemoryLeakTestMixin" has no attribute "assertLessEqual" [attr-defined]
posthog/test/base.py:0: error: "MemoryLeakTestMixin" has no attribute "assertLessEqual" [attr-defined]
posthog/test/base.py:0: error: Argument "table_column" to "materialize" has incompatible type "str"; expected "Literal['properties', 'group_properties', 'person_properties', 'group0_properties', 'group1_properties', 'group2_properties', 'group3_properties', 'group4_properties']" [arg-type]
posthog/test/base.py:0: error: Item "None" of "FrameType | None" has no attribute "f_back" [union-attr]
posthog/test/base.py:0: error: Item "None" of "FrameType | Any | None" has no attribute "f_locals" [union-attr]
posthog/test/base.py:0: error: Item "None" of "AppConfig | None" has no attribute "name" [union-attr]
Expand Down
16 changes: 12 additions & 4 deletions posthog/api/test/test_properties_timeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,9 @@ def test_timeline_for_existing_actor_with_three_events_and_return_to_previous_va
)

@snapshot_clickhouse_queries
@also_test_with_materialized_columns(person_properties=["bar"], materialize_only_with_person_on_events=True)
@also_test_with_materialized_columns(
person_properties=["bar"],
)
def test_timeline_for_existing_person_with_three_events_and_return_to_previous_value_at_single_day_point(self):
self._create_person(properties={"foo": "abc", "bar": 123})
self._create_event(
Expand Down Expand Up @@ -380,7 +382,9 @@ def test_timeline_for_existing_person_with_three_events_and_return_to_previous_v
)

@snapshot_clickhouse_queries
@also_test_with_materialized_columns(person_properties=["bar"], materialize_only_with_person_on_events=True)
@also_test_with_materialized_columns(
person_properties=["bar"],
)
def test_timeline_for_existing_person_with_three_events_and_return_to_previous_value_at_single_hour_point(self):
self._create_person(properties={"foo": "abc", "bar": 123})
self._create_event(
Expand Down Expand Up @@ -443,7 +447,9 @@ def test_timeline_for_existing_person_with_three_events_and_return_to_previous_v
)

@snapshot_clickhouse_queries
@also_test_with_materialized_columns(person_properties=["bar"], materialize_only_with_person_on_events=True)
@also_test_with_materialized_columns(
person_properties=["bar"],
)
def test_timeline_for_existing_person_with_three_events_and_return_to_previous_value_at_single_month_point(
self,
):
Expand Down Expand Up @@ -508,7 +514,9 @@ def test_timeline_for_existing_person_with_three_events_and_return_to_previous_v
)

@snapshot_clickhouse_queries
@also_test_with_materialized_columns(person_properties=["bar"], materialize_only_with_person_on_events=True)
@also_test_with_materialized_columns(
person_properties=["bar"],
)
def test_timeline_for_existing_person_with_three_events_and_return_to_previous_value_using_relative_date_from(
self,
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2957,10 +2957,6 @@ def test_funnel_aggregate_by_groups_breakdown_group(self):
],
)

@also_test_with_materialized_columns(
group_properties=[(0, "industry")],
materialize_only_with_person_on_events=True,
)
@also_test_with_person_on_events_v2
@snapshot_clickhouse_queries
def test_funnel_aggregate_by_groups_breakdown_group_person_on_events(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,6 @@ def test_funnel_correlation_with_properties_and_groups(self):
@also_test_with_materialized_columns(
event_properties=[],
person_properties=["$browser"],
group_properties=[(0, "industry")],
verify_no_jsonextract=False,
)
@also_test_with_person_on_events_v2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3871,11 +3871,6 @@ def test_groups_filtering(self):
],
)

@also_test_with_materialized_columns(
["$current_url", "$screen_name"],
group_properties=[(0, "industry"), (1, "industry")],
materialize_only_with_person_on_events=True,
)
@snapshot_clickhouse_queries
def test_groups_filtering_person_on_events(self):
self._create_groups()
Expand Down
Loading

0 comments on commit 26f0438

Please sign in to comment.