From 088399bbe017118aa31168e3013db9c943530716 Mon Sep 17 00:00:00 2001 From: David Newell Date: Tue, 19 Mar 2024 16:02:07 +0000 Subject: [PATCH] feat: error clustering UI (#20958) --- ee/session_recordings/ai/error_clustering.py | 40 +++-- .../errors/SessionRecordingErrors.tsx | 158 ++++++++++++------ .../playlist/SessionRecordingPreview.tsx | 4 +- frontend/src/types.ts | 7 +- .../session_recording_api.py | 2 +- 5 files changed, 142 insertions(+), 69 deletions(-) diff --git a/ee/session_recordings/ai/error_clustering.py b/ee/session_recordings/ai/error_clustering.py index 7936297ce1e43..03ea4f62d2789 100644 --- a/ee/session_recordings/ai/error_clustering.py +++ b/ee/session_recordings/ai/error_clustering.py @@ -1,9 +1,11 @@ from prometheus_client import Histogram from django.conf import settings from posthog.clickhouse.client import sync_execute -from posthog.models.team import Team +from posthog.models import Team, User from sklearn.cluster import DBSCAN import pandas as pd +import numpy as np +from posthog.session_recordings.models.session_recording_event import SessionRecordingViewed CLUSTER_REPLAY_ERRORS_TIMING = Histogram( "posthog_session_recordings_cluster_replay_errors", @@ -22,7 +24,7 @@ DBSCAN_MIN_SAMPLES = settings.REPLAY_EMBEDDINGS_CLUSTERING_DBSCAN_MIN_SAMPLES -def error_clustering(team: Team): +def error_clustering(team: Team, user: User): results = fetch_error_embeddings(team.pk) if not results: @@ -34,7 +36,7 @@ def error_clustering(team: Team): CLUSTER_REPLAY_ERRORS_CLUSTER_COUNT.labels(team_id=team.pk).observe(df["cluster"].nunique()) - return construct_response(df) + return construct_response(df, team, user) def fetch_error_embeddings(team_id: int): @@ -64,13 +66,25 @@ def cluster_embeddings(embeddings): return dbscan.labels_ -def construct_response(df): - return [ - { - "cluster": cluster, - "samples": rows.head(n=DBSCAN_MIN_SAMPLES)[["session_id", "input"]].to_dict("records"), - "occurrences": rows.size, - "unique_sessions": rows["session_id"].count(), - } - for cluster, rows in df.groupby("cluster") - ] +def construct_response(df: pd.DataFrame, team: Team, user: User): + viewed_session_ids = list( + SessionRecordingViewed.objects.filter(team=team, user=user, session_id__in=df["session_id"].unique()) + .values_list("session_id", flat=True) + .distinct() + ) + + clusters = [] + for cluster, rows in df.groupby("cluster"): + session_ids = rows["session_id"].unique() + sample = rows.sample(n=1)[["session_id", "input"]].rename(columns={"input": "error"}).to_dict("records") + clusters.append( + { + "cluster": cluster, + "sample": sample, + "session_ids": session_ids, + "occurrences": rows.size, + "unique_sessions": len(session_ids), + "viewed": len(np.intersect1d(session_ids, viewed_session_ids, assume_unique=True)), + } + ) + return clusters diff --git a/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx b/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx index 6ff2bd792ab72..56eac1213d91a 100644 --- a/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx +++ b/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx @@ -1,13 +1,18 @@ import { IconFeatures } from '@posthog/icons' -import { LemonButton, LemonCollapse, Spinner } from '@posthog/lemon-ui' +import { LemonButton, LemonTable, LemonTabs, Spinner } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' +import { JSONViewer } from 'lib/components/JSONViewer' +import { useState } from 'react' import { urls } from 'scenes/urls' -import { ErrorClusterSample } from '~/types' - +import { SessionPlayerModal } from '../player/modal/SessionPlayerModal' +import { sessionPlayerModalLogic } from '../player/modal/sessionPlayerModalLogic' import { sessionRecordingErrorsLogic } from './sessionRecordingErrorsLogic' +const MAX_TITLE_LENGTH = 75 + export function SessionRecordingErrors(): JSX.Element { + const { openSessionPlayer } = useActions(sessionPlayerModalLogic) const { errors, errorsLoading } = useValues(sessionRecordingErrorsLogic) const { loadErrorClusters } = useActions(sessionRecordingErrorsLogic) @@ -24,57 +29,112 @@ export function SessionRecordingErrors(): JSX.Element { } return ( - ({ - key: error.cluster, - header: ( - - ), - content: , - }))} - /> + <> + { + const displayTitle = parseTitle(cluster.sample.error) + return ( +
+ {displayTitle} +
+ ) + }, + width: '50%', + }, + { + title: 'Occurrences', + dataIndex: 'occurrences', + sorter: (a, b) => a.occurrences - b.occurrences, + }, + { + title: 'Sessions', + dataIndex: 'unique_sessions', + sorter: (a, b) => a.unique_sessions - b.unique_sessions, + }, + { + title: 'Viewed', + tooltip: "How many of these you've already viewed", + dataIndex: 'viewed', + render: function Render(_, cluster) { + return `${((cluster.viewed / cluster.unique_sessions) * 100).toFixed(0)}%` + }, + sorter: (a, b) => a.viewed / a.unique_sessions - b.viewed / b.unique_sessions, + }, + { + title: 'Actions', + render: function Render(_, cluster) { + return ( + { + e.preventDefault() + openSessionPlayer({ id: cluster.sample.session_id }) + }} + className="p-2 whitespace-nowrap" + type="primary" + > + Watch example + + ) + }, + }, + ]} + dataSource={errors} + expandable={{ expandedRowRender: (cluster) => }} + /> + + ) } -const ErrorPanelHeader = ({ - occurrenceCount, - sessionCount, - example, -}: { - occurrenceCount: number - sessionCount: number - example: ErrorClusterSample -}): JSX.Element => { - return ( -
- {example.input} -
- - {occurrenceCount} occurrences / {sessionCount} sessions - - - Watch recording - -
+const ExpandedError = ({ error }: { error: string }): JSX.Element => { + const hasJson = isJSON(error) + const [activeTab, setActiveTab] = useState(hasJson ? 'json' : 'raw') + + return hasJson ? ( +
+ , + }, + { key: 'raw', label: 'Raw', content: {error} }, + ]} + /> +
+ ) : ( +
+

Example error

+
{error}
) } -const ErrorPanelContent = ({ samples }: { samples: ErrorClusterSample[] }): JSX.Element => { - return ( -
- {samples.map((error) => ( -
- {error.input} - - Watch recording - -
- ))} -
- ) +function isJSON(str: string): boolean { + try { + JSON.parse(str) + return true + } catch { + return false + } +} + +function parseTitle(error: string): string { + let input + try { + const parsedError = JSON.parse(error) + input = parsedError.error || error + } catch { + input = error + } + + return input.split('\n')[0].trim().substring(0, MAX_TITLE_LENGTH) } diff --git a/frontend/src/scenes/session-recordings/playlist/SessionRecordingPreview.tsx b/frontend/src/scenes/session-recordings/playlist/SessionRecordingPreview.tsx index 0a234d567ff8e..0ce2a9213ebe8 100644 --- a/frontend/src/scenes/session-recordings/playlist/SessionRecordingPreview.tsx +++ b/frontend/src/scenes/session-recordings/playlist/SessionRecordingPreview.tsx @@ -237,8 +237,8 @@ function PinnedIndicator(): JSX.Element | null { ) } -function ViewedIndicator(props: { viewed: boolean }): JSX.Element | null { - return !props.viewed ? ( +function ViewedIndicator({ viewed }: { viewed: boolean }): JSX.Element | null { + return !viewed ? (
diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 74d8ca7db1748..9d53e22e4008f 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -901,13 +901,12 @@ export interface SessionRecordingsResponse { has_next: boolean } -export type ErrorClusterSample = { session_id: string; input: string } - -type ErrorCluster = { +export type ErrorCluster = { cluster: number - samples: ErrorClusterSample[] + sample: { session_id: string; error: string } occurrences: number unique_sessions: number + viewed: number } export type ErrorClusterResponse = ErrorCluster[] | null diff --git a/posthog/session_recordings/session_recording_api.py b/posthog/session_recordings/session_recording_api.py index c784d737c822a..a708cb0f77792 100644 --- a/posthog/session_recordings/session_recording_api.py +++ b/posthog/session_recordings/session_recording_api.py @@ -603,7 +603,7 @@ def error_clusters(self, request: request.Request, **kwargs): raise exceptions.ValidationError("clustered errors is not enabled for this user") # Clustering will eventually be done during a scheduled background task - clusters = error_clustering(self.team) + clusters = error_clustering(self.team, user) if clusters: cache.set(cache_key, clusters, timeout=30)