Skip to content

Commit

Permalink
fix: gather server timings for important changes (PostHog#19623)
Browse files Browse the repository at this point in the history
* fix: gather server timings for important changes

* zoom in

* oh, that's not safe to share :/
  • Loading branch information
pauldambra authored and jacobwgillespie committed Jan 12, 2024
1 parent 1de6a3a commit ab807fd
Showing 1 changed file with 158 additions and 113 deletions.
271 changes: 158 additions & 113 deletions posthog/api/activity_log.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Any, Optional
import time
from typing import Any, Optional, Dict

from django.db.models import Q, QuerySet

Expand Down Expand Up @@ -45,6 +46,31 @@ class ActivityLogPagination(pagination.CursorPagination):
page_size = 100


# context manager for gathering a sequence of server timings
class ServerTimingsGathered:
# Class level dictionary to store timings
timings_dict: Dict[str, float] = {}

def __call__(self, name):
self.name = name
return self

def __enter__(self):
# timings are assumed to be in milliseconds when reported
# but are gathered by time.perf_counter which is fractional seconds 🫠
# so each value is multiplied by 1000 at collection
self.start_time = time.perf_counter() * 1000

def __exit__(self, exc_type, exc_val, exc_tb):
end_time = time.perf_counter() * 1000
elapsed_time = end_time - self.start_time
ServerTimingsGathered.timings_dict[self.name] = elapsed_time

@classmethod
def get_all_timings(cls):
return cls.timings_dict


class ActivityLogViewSet(StructuredViewSetMixin, viewsets.GenericViewSet, mixins.ListModelMixin):
queryset = ActivityLog.objects.all()
serializer_class = ActivityLogSerializer
Expand Down Expand Up @@ -76,140 +102,159 @@ def important_changes(self, request: Request, *args: Any, **kwargs: Any) -> Resp
# this is for mypy
return Response(status=status.HTTP_401_UNAUTHORIZED)

# first things this user created
my_insights = list(Insight.objects.filter(created_by=user, team_id=self.team.pk).values_list("id", flat=True))
my_feature_flags = list(
FeatureFlag.objects.filter(created_by=user, team_id=self.team.pk).values_list("id", flat=True)
)
my_notebooks = list(
Notebook.objects.filter(created_by=user, team_id=self.team.pk).values_list("short_id", flat=True)
)
my_comments = list(Comment.objects.filter(created_by=user, team_id=self.team.pk).values_list("id", flat=True))

# then things they edited
interesting_changes = [
"updated",
"exported",
"sharing enabled",
"sharing disabled",
"deleted",
"commented",
]
my_changed_insights = list(
ActivityLog.objects.filter(
team_id=self.team.id,
activity__in=interesting_changes,
user_id=user.pk,
scope="Insight",
)
.exclude(item_id__in=my_insights)
.values_list("item_id", flat=True)
)
timer = ServerTimingsGathered()

my_changed_notebooks = list(
ActivityLog.objects.filter(
team_id=self.team.id,
activity__in=interesting_changes,
user_id=user.pk,
scope="Notebook",
with timer("gather_query_parts"):
# first things this user created
my_insights = list(
Insight.objects.filter(created_by=user, team_id=self.team.pk).values_list("id", flat=True)
)
my_feature_flags = list(
FeatureFlag.objects.filter(created_by=user, team_id=self.team.pk).values_list("id", flat=True)
)
my_notebooks = list(
Notebook.objects.filter(created_by=user, team_id=self.team.pk).values_list("short_id", flat=True)
)
my_comments = list(
Comment.objects.filter(created_by=user, team_id=self.team.pk).values_list("id", flat=True)
)
.exclude(item_id__in=my_notebooks)
.values_list("item_id", flat=True)
)

my_changed_feature_flags = list(
ActivityLog.objects.filter(
team_id=self.team.id,
activity__in=interesting_changes,
user_id=user.pk,
scope="FeatureFlag",
# then things they edited
interesting_changes = [
"updated",
"exported",
"sharing enabled",
"sharing disabled",
"deleted",
"commented",
]
my_changed_insights = list(
ActivityLog.objects.filter(
team_id=self.team.id,
activity__in=interesting_changes,
user_id=user.pk,
scope="Insight",
)
.exclude(item_id__in=my_insights)
.values_list("item_id", flat=True)
)
.exclude(item_id__in=my_feature_flags)
.values_list("item_id", flat=True)
)

my_changed_comments = list(
ActivityLog.objects.filter(
team_id=self.team.id,
activity__in=interesting_changes,
user_id=user.pk,
scope="Comment",
my_changed_notebooks = list(
ActivityLog.objects.filter(
team_id=self.team.id,
activity__in=interesting_changes,
user_id=user.pk,
scope="Notebook",
)
.exclude(item_id__in=my_notebooks)
.values_list("item_id", flat=True)
)
.exclude(item_id__in=my_comments)
.values_list("item_id", flat=True)
)

last_read_date = NotificationViewed.objects.filter(user=user).first()
last_read_filter = ""

if last_read_date and params.get("unread") == "true":
last_read_filter = f"AND created_at > '{last_read_date.last_viewed_activity_date.isoformat()}'"

# before we filter to include only the important changes, we need to deduplicate too frequent changes
candidate_ids = ActivityLog.objects.raw(
f"""
SELECT id
FROM (SELECT
Row_number() over (
PARTITION BY five_minute_window, activity, item_id, scope ORDER BY created_at DESC
) AS row_number,
*
FROM (
-- copied from https://stackoverflow.com/a/43028800
SELECT to_timestamp(floor(Extract(epoch FROM created_at) / extract(epoch FROM interval '5 min')) *
extract(epoch FROM interval '5 min')) AS five_minute_window,
activity, item_id, scope, id, created_at
FROM posthog_activitylog
WHERE team_id = {self.team_id}
AND NOT (user_id = {user.pk} AND user_id IS NOT NULL)
{last_read_filter}
ORDER BY created_at DESC) AS inner_q) AS counted_q
WHERE row_number = 1
"""
)
my_changed_feature_flags = list(
ActivityLog.objects.filter(
team_id=self.team.id,
activity__in=interesting_changes,
user_id=user.pk,
scope="FeatureFlag",
)
.exclude(item_id__in=my_feature_flags)
.values_list("item_id", flat=True)
)

other_peoples_changes = (
self.queryset.exclude(user=user)
.filter(team_id=self.team.id)
.filter(
Q(
Q(Q(scope="FeatureFlag") & Q(item_id__in=my_feature_flags))
| Q(Q(scope="Insight") & Q(item_id__in=my_insights))
| Q(Q(scope="Notebook") & Q(item_id__in=my_notebooks))
| Q(Q(scope="Comment") & Q(item_id__in=my_comments))
my_changed_comments = list(
ActivityLog.objects.filter(
team_id=self.team.id,
activity__in=interesting_changes,
user_id=user.pk,
scope="Comment",
)
| Q(
# don't want to see creation of these things since that was before the user edited these things
Q(activity__in=interesting_changes)
& Q(
Q(Q(scope="FeatureFlag") & Q(item_id__in=my_changed_feature_flags))
| Q(Q(scope="Insight") & Q(item_id__in=my_changed_insights))
| Q(Q(scope="Notebook") & Q(item_id__in=my_changed_notebooks))
| Q(Q(scope="Comment") & Q(item_id__in=my_changed_comments))
.exclude(item_id__in=my_comments)
.values_list("item_id", flat=True)
)

last_read_date = NotificationViewed.objects.filter(user=user).first()
last_read_filter = ""

if last_read_date and params.get("unread") == "true":
last_read_filter = f"AND created_at > '{last_read_date.last_viewed_activity_date.isoformat()}'"

with timer("query_for_candidate_ids"):
# before we filter to include only the important changes, we need to deduplicate too frequent changes
candidate_ids = ActivityLog.objects.raw(
f"""
SELECT id
FROM (SELECT
Row_number() over (
PARTITION BY five_minute_window, activity, item_id, scope ORDER BY created_at DESC
) AS row_number,
*
FROM (
-- copied from https://stackoverflow.com/a/43028800
SELECT to_timestamp(floor(Extract(epoch FROM created_at) / extract(epoch FROM interval '5 min')) *
extract(epoch FROM interval '5 min')) AS five_minute_window,
activity, item_id, scope, id, created_at
FROM posthog_activitylog
WHERE team_id = {self.team_id}
AND NOT (user_id = {user.pk} AND user_id IS NOT NULL)
{last_read_filter}
ORDER BY created_at DESC) AS inner_q) AS counted_q
WHERE row_number = 1
"""
)

with timer("construct_query"):
other_peoples_changes = (
self.queryset.exclude(user=user)
.filter(team_id=self.team.id)
.filter(
Q(
Q(Q(scope="FeatureFlag") & Q(item_id__in=my_feature_flags))
| Q(Q(scope="Insight") & Q(item_id__in=my_insights))
| Q(Q(scope="Notebook") & Q(item_id__in=my_notebooks))
| Q(Q(scope="Comment") & Q(item_id__in=my_comments))
)
| Q(
# don't want to see creation of these things since that was before the user edited these things
Q(activity__in=interesting_changes)
& Q(
Q(Q(scope="FeatureFlag") & Q(item_id__in=my_changed_feature_flags))
| Q(Q(scope="Insight") & Q(item_id__in=my_changed_insights))
| Q(Q(scope="Notebook") & Q(item_id__in=my_changed_notebooks))
| Q(Q(scope="Comment") & Q(item_id__in=my_changed_comments))
)
)
)
.filter(id__in=[c.id for c in candidate_ids])
.order_by("-created_at")
)
.filter(id__in=[c.id for c in candidate_ids])
.order_by("-created_at")
)

if last_read_date and params.get("unread") == "true":
other_peoples_changes = other_peoples_changes.filter(
created_at__gt=last_read_date.last_viewed_activity_date
)
if last_read_date and params.get("unread") == "true":
other_peoples_changes = other_peoples_changes.filter(
created_at__gt=last_read_date.last_viewed_activity_date
)

with timer("query_for_data"):
page_of_data = other_peoples_changes[:10]

serialized_data = ActivityLogSerializer(
instance=other_peoples_changes[:10], many=True, context={"user": user}
).data
with timer("serialize"):
serialized_data = ActivityLogSerializer(instance=page_of_data, many=True, context={"user": user}).data

return Response(
timings = timer.get_all_timings()

response = Response(
status=status.HTTP_200_OK,
data={
"results": serialized_data,
"last_read": last_read_date.last_viewed_activity_date if last_read_date else None,
},
)

response.headers["Server-Timing"] = ", ".join(
f"{key};dur={round(duration, ndigits=2)}" for key, duration in timings.items()
)

return response

@action(methods=["POST"], detail=False)
def bookmark_activity_notification(self, request: Request, *args: Any, **kwargs: Any) -> Response:
user = request.user
Expand Down

0 comments on commit ab807fd

Please sign in to comment.