Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: only send product_intent events if not activated #25889

Merged
merged 20 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions frontend/src/scenes/teamLogic.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ export const teamLogic = kea<teamLogicType>([
return await api.create(`api/projects/${values.currentProject.id}/environments/`, { name, is_demo })
},
resetToken: async () => await api.update(`api/environments/${values.currentTeamId}/reset_token`, {}),
/**
* If adding a product intent that also represents regular product usage, see explainer in posthog.models.product_intent.product_intent.py.
*/
addProductIntent: async ({
product_type,
intent_context,
Expand Down
11 changes: 8 additions & 3 deletions posthog/api/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,12 @@
from posthog.models.async_deletion import AsyncDeletion, DeletionType
from posthog.models.group_type_mapping import GroupTypeMapping
from posthog.models.organization import OrganizationMembership
from posthog.models.scopes import APIScopeObjectOrNotSupported
from posthog.models.product_intent.product_intent import ProductIntent
from posthog.models.product_intent.product_intent import (
ProductIntent,
calculate_product_activation,
)
from posthog.models.project import Project
from posthog.models.scopes import APIScopeObjectOrNotSupported
from posthog.models.signals import mute_selected_signals
from posthog.models.team.util import delete_batch_exports, delete_bulky_postgres_data
from posthog.models.utils import UUIDT
Expand Down Expand Up @@ -199,6 +202,7 @@ def get_live_events_token(self, project: Project) -> Optional[str]:
def get_product_intents(self, obj):
project = obj
team = project.passthrough_team
calculate_product_activation.delay(team, only_calc_if_days_since_last_checked=1)
return ProductIntent.objects.filter(team=team).values(
"product_type", "created_at", "onboarding_completed_at", "updated_at"
)
Expand Down Expand Up @@ -575,10 +579,11 @@ def add_product_intent(self, request: request.Request, *args, **kwargs):

product_intent, created = ProductIntent.objects.get_or_create(team=team, product_type=product_type)
if not created:
product_intent.check_and_update_activation()
product_intent.updated_at = datetime.now(tz=UTC)
product_intent.save()

if isinstance(user, User):
if isinstance(user, User) and not product_intent.activated_at:
report_user_action(
user,
"user showed product intent",
Expand Down
7 changes: 5 additions & 2 deletions posthog/api/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
from posthog.models.async_deletion import AsyncDeletion, DeletionType
from posthog.models.group_type_mapping import GroupTypeMapping
from posthog.models.organization import OrganizationMembership
from posthog.models.scopes import APIScopeObjectOrNotSupported
from posthog.models.product_intent.product_intent import calculate_product_activation
from posthog.models.project import Project
from posthog.models.scopes import APIScopeObjectOrNotSupported
from posthog.models.signals import mute_selected_signals
from posthog.models.team.util import delete_batch_exports, delete_bulky_postgres_data
from posthog.models.utils import UUIDT
Expand Down Expand Up @@ -215,6 +216,7 @@ def get_live_events_token(self, team: Team) -> Optional[str]:
)

def get_product_intents(self, obj):
calculate_product_activation.delay(obj, only_calc_if_days_since_last_checked=1)
return ProductIntent.objects.filter(team=obj).values(
"product_type", "created_at", "onboarding_completed_at", "updated_at"
)
Expand Down Expand Up @@ -585,10 +587,11 @@ def add_product_intent(self, request: request.Request, *args, **kwargs):

product_intent, created = ProductIntent.objects.get_or_create(team=team, product_type=product_type)
if not created:
product_intent.check_and_update_activation()
product_intent.updated_at = datetime.now(tz=UTC)
product_intent.save()

if isinstance(user, User):
if isinstance(user, User) and not product_intent.activated_at:
report_user_action(
user,
"user showed product intent",
Expand Down
97 changes: 97 additions & 0 deletions posthog/models/product_intent/product_intent.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,37 @@
from datetime import UTC, datetime

from celery import shared_task
from django.db import models

from posthog.event_usage import report_team_action
from posthog.models.insight import Insight
from posthog.models.team.team import Team
from posthog.models.utils import UUIDModel
from posthog.utils import get_instance_realm

"""
How to use this model:

Product intents are indicators that someone showed an interest in a given product.
They are triggered from the frontend when the user performs certain actions, like
selecting a product during onboarding or clicking on a certain button.

Some buttons that show product intent are frequently used by all users of the product,
so we need to know if it's a new product intent, or if it's just regular usage. We
can use the `activated_at` field to know if we should continue to update the product
intent row, or if we should stop because it's just regular usage.

The `activated_at` field is set by checking against certain criteria that differs for
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is clever.

each product. For instance, for the data warehouse product, we check if the user has
created any DataVisualizationNode insights in the 30 days after the product intent
was created. Each product needs to implement a method that checks for activation
criteria if the intent actions are the same as the general usage actions.

We shouldn't use this model and the `activated_at` field in place of sending events
about product usage because that limits our data exploration later. Definitely continue
sending events for product usage that we may want to track for any reason, along with
calculating activation here.
"""


class ProductIntent(UUIDModel):
Expand All @@ -9,9 +40,75 @@ class ProductIntent(UUIDModel):
updated_at = models.DateTimeField(auto_now=True)
product_type = models.CharField(max_length=255)
onboarding_completed_at = models.DateTimeField(null=True, blank=True)
activated_at = models.DateTimeField(
null=True,
blank=True,
help_text="The date the org completed activation for the product. Generally only used to know if we should continue updating the product_intent row.",
)
activation_last_checked_at = models.DateTimeField(
null=True,
blank=True,
help_text="The date we last checked if the org had completed activation for the product.",
)

class Meta:
unique_together = ["team", "product_type"]

def __str__(self):
return f"{self.team.name} - {self.product_type}"

def has_activated_data_warehouse(self) -> bool:
insights = Insight.objects.filter(
team=self.team,
created_at__gte=datetime(2024, 6, 1, tzinfo=UTC),
query__kind="DataVisualizationNode",
)

excluded_tables = ["events", "persons", "sessions", "person_distinct_ids"]
for insight in insights:
if insight.query and insight.query.get("source", {}).get("query"):
query_text = insight.query["source"]["query"].lower()
# Check if query doesn't contain any of the excluded tables after 'from'
has_excluded_table = any(f"from {table}" in query_text.replace("\\", "") for table in excluded_tables)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this part. Why are these tables excluded? Are they just the stock ones provided by PostHog? So if not excluded, then it's assumed to be one of their tables?

This could be optimized so it's all done in the query:

    excluded_tables = ["events", "persons", "sessions", "person_distinct_ids"]
    
    # Create a list of Q objects for each excluded table
    excluded_patterns = [
        Q(query__source__query__icontains=f"from {table}") 
        for table in excluded_tables
    ]
    
    # Combine all exclusion patterns with OR operator
    exclusion_filter = reduce(operator.or_, excluded_patterns)
    
    # Single optimized query that:
    # 1. Filters by team and date
    # 2. Filters by query kind
    # 3. Excludes queries containing any excluded table
    # 4. Checks if any matching record exists
    return Insight.objects.filter(
        team=self.team,
        created_at__gte=datetime(2024, 6, 1, tzinfo=UTC),
        query__kind="DataVisualizationNode",
    ).exclude(
        exclusion_filter
    ).filter(
        query__source__query__isnull=False
    ).exists()
    ```

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This query is what Eric gave me for data warehouse activation. I'll look at your optimization! I haven't tested that this works at all yet haha.

if not has_excluded_table:
return True

return False

def check_and_update_activation(self) -> None:
if self.product_type == "data_warehouse":
if self.has_activated_data_warehouse():
self.activated_at = datetime.now(tz=UTC)
self.save()
self.report_activation("data_warehouse")

def report_activation(self, product_key: str) -> None:
report_team_action(
self.team.organization,
"product intent marked activated",
{
"product_key": product_key,
"intent_created_at": self.created_at,
"intent_updated_at": self.updated_at,
"realm": get_instance_realm(),
},
)


@shared_task(ignore_result=True)
def calculate_product_activation(team: Team, only_calc_if_days_since_last_checked: int = 1) -> None:
"""
Calculate product activation for a team.
Only calculate if it's been more than `only_calc_if_days_since_last_checked` days since the last activation check.
"""
product_intents = ProductIntent.objects.filter(team=team)
for product_intent in product_intents:
if product_intent.activated_at:
continue
if (
product_intent.activation_last_checked_at
and (datetime.now(tz=UTC) - product_intent.activation_last_checked_at).days
<= only_calc_if_days_since_last_checked
):
continue
product_intent.check_and_update_activation()
Loading