diff --git a/frontend/__snapshots__/scenes-app-errortracking--list-page--dark.png b/frontend/__snapshots__/scenes-app-errortracking--list-page--dark.png index bef1e957a7f15..a5621ecc99e11 100644 Binary files a/frontend/__snapshots__/scenes-app-errortracking--list-page--dark.png and b/frontend/__snapshots__/scenes-app-errortracking--list-page--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-errortracking--list-page--light.png b/frontend/__snapshots__/scenes-app-errortracking--list-page--light.png index 46c495edc53bd..9bbbf1b37f239 100644 Binary files a/frontend/__snapshots__/scenes-app-errortracking--list-page--light.png and b/frontend/__snapshots__/scenes-app-errortracking--list-page--light.png differ diff --git a/frontend/src/scenes/error-tracking/ErrorTrackingConfigurationScene.tsx b/frontend/src/scenes/error-tracking/ErrorTrackingConfigurationScene.tsx index b61056d229b43..a7e105958f356 100644 --- a/frontend/src/scenes/error-tracking/ErrorTrackingConfigurationScene.tsx +++ b/frontend/src/scenes/error-tracking/ErrorTrackingConfigurationScene.tsx @@ -1,9 +1,10 @@ -import { IconTrash } from '@posthog/icons' -import { LemonButton, LemonCollapse, LemonTable, LemonTableColumns, LemonTabs } from '@posthog/lemon-ui' +import { IconRevert, IconTrash, IconUpload } from '@posthog/icons' +import { LemonButton, LemonCollapse, LemonDialog, LemonTable, LemonTableColumns, LemonTabs } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' import { stackFrameLogic } from 'lib/components/Errors/stackFrameLogic' import { ErrorTrackingSymbolSet } from 'lib/components/Errors/types' import { JSONViewer } from 'lib/components/JSONViewer' +import { humanFriendlyDetailedTime } from 'lib/utils' import { useEffect, useState } from 'react' import { SceneExport } from 'scenes/sceneTypes' @@ -43,25 +44,48 @@ const SymbolSetTable = ({ missing?: boolean }): JSX.Element => { const { symbolSetsLoading } = useValues(errorTrackingSymbolSetLogic) - const { deleteSymbolSet } = useActions(errorTrackingSymbolSetLogic) + const { deleteSymbolSet, setUploadSymbolSetId } = useActions(errorTrackingSymbolSetLogic) const columns: LemonTableColumns = [ { title: missing && 'Missing symbol sets', dataIndex: 'ref' }, + { title: 'Created At', dataIndex: 'created_at', render: (data) => humanFriendlyDetailedTime(data as string) }, { dataIndex: 'id', render: (_, { id }) => { return ( -
- {!missing && ( - } - onClick={() => deleteSymbolSet(id)} - className="py-1" - /> - )} +
+ : } + onClick={() => setUploadSymbolSetId(id)} + className="py-1" + > + {missing && 'Upload'} + + } + onClick={() => + LemonDialog.open({ + title: 'Delete symbol set', + description: 'Are you sure you want to delete this symbol set?', + secondaryButton: { + type: 'secondary', + children: 'Cancel', + }, + primaryButton: { + type: 'primary', + onClick: () => deleteSymbolSet(id), + children: 'Delete', + }, + }) + } + className="py-1" + />
) }, diff --git a/frontend/src/scenes/error-tracking/ErrorTrackingFilters.tsx b/frontend/src/scenes/error-tracking/ErrorTrackingFilters.tsx index 0db4cccd72302..050ef5ac7eff3 100644 --- a/frontend/src/scenes/error-tracking/ErrorTrackingFilters.tsx +++ b/frontend/src/scenes/error-tracking/ErrorTrackingFilters.tsx @@ -140,17 +140,19 @@ export const Options = ({ isGroup = false }: { isGroup?: boolean }): JSX.Element
)} - {hasGroupActions && !isGroup && ( -
- Assigned to: - { - setAssignee(user?.id || null) - }} - /> -
- )} +
+ {hasGroupActions && !isGroup && ( + <> + Assigned to: + { + setAssignee(user?.id || null) + }} + /> + + )} +
) } diff --git a/frontend/src/scenes/error-tracking/ErrorTrackingScene.tsx b/frontend/src/scenes/error-tracking/ErrorTrackingScene.tsx index e9b68fef636a1..a430e66e5fc4c 100644 --- a/frontend/src/scenes/error-tracking/ErrorTrackingScene.tsx +++ b/frontend/src/scenes/error-tracking/ErrorTrackingScene.tsx @@ -8,6 +8,7 @@ import { PageHeader } from 'lib/components/PageHeader' import { LemonTableLink } from 'lib/lemon-ui/LemonTable/LemonTableLink' import { SceneExport } from 'scenes/sceneTypes' import { urls } from 'scenes/urls' +import { userLogic } from 'scenes/userLogic' import { insightVizDataNodeKey } from '~/queries/nodes/InsightViz/InsightViz' import { Query } from '~/queries/Query/Query' @@ -164,12 +165,25 @@ const AssigneeColumn: QueryContextColumnComponent = (props) => { } const Header = (): JSX.Element => { + const { user } = useValues(userLogic) + return ( }> - Configure - + <> + {user?.is_staff ? ( + { + throw Error('Oh my!') + }} + > + Send an exception + + ) : null} + }> + Configure + + } /> ) diff --git a/frontend/src/scenes/error-tracking/SymbolSetUploadModal.tsx b/frontend/src/scenes/error-tracking/SymbolSetUploadModal.tsx index 28e7820759712..f03d80f13671d 100644 --- a/frontend/src/scenes/error-tracking/SymbolSetUploadModal.tsx +++ b/frontend/src/scenes/error-tracking/SymbolSetUploadModal.tsx @@ -7,42 +7,61 @@ import { LemonField } from 'lib/lemon-ui/LemonField' import { errorTrackingSymbolSetLogic } from './errorTrackingSymbolSetLogic' export const SymbolSetUploadModal = (): JSX.Element => { - const { setUploadSymbolSetReference } = useActions(errorTrackingSymbolSetLogic) - const { uploadSymbolSetReference, isUploadSymbolSetSubmitting, uploadSymbolSet } = - useValues(errorTrackingSymbolSetLogic) + const { setUploadSymbolSetId } = useActions(errorTrackingSymbolSetLogic) + const { uploadSymbolSetId, isUploadSymbolSetSubmitting, uploadSymbolSet } = useValues(errorTrackingSymbolSetLogic) - const onClose = (): void => setUploadSymbolSetReference(null) + const onClose = (): void => setUploadSymbolSetId(null) return ( - +
-

Upload source map

+

Upload javscript symbol set

- + +
- Add source map + Add minified source
- Drag and drop your local source map here or click to open the file browser. + Drag and drop your minified source file here or click to open the file browser.
} />
+ + + + Add source map + +
Drag and drop your source map here or click to open the file browser.
+ + } + /> +
Cancel ([ path(['scenes', 'error-tracking', 'errorTrackingSymbolSetLogic']), actions({ - setUploadSymbolSetReference: (ref: ErrorTrackingSymbolSet['id'] | null) => ({ ref }), + setUploadSymbolSetId: (id: ErrorTrackingSymbolSet['id'] | null) => ({ id }), }), reducers({ - uploadSymbolSetReference: [ + uploadSymbolSetId: [ null as string | null, { - setUploadSymbolSetReference: (_, { ref }) => ref, + setUploadSymbolSetId: (_, { id }) => id, }, ], }), @@ -40,10 +47,10 @@ export const errorTrackingSymbolSetLogic = kea( const response = await api.errorTracking.symbolSets() return response.results }, - deleteSymbolSet: async (ref) => { - await api.errorTracking.deleteSymbolSet(ref) + deleteSymbolSet: async (id) => { + await api.errorTracking.deleteSymbolSet(id) const newValues = [...values.symbolSets] - return newValues.filter((v) => v.ref !== ref) + return newValues.filter((v) => v.id !== id) }, }, ], @@ -70,17 +77,29 @@ export const errorTrackingSymbolSetLogic = kea( forms(({ values, actions }) => ({ uploadSymbolSet: { - defaults: { files: [] } as { files: File[] }, - submit: async ({ files }) => { - if (files.length > 0 && values.uploadSymbolSetReference) { - const formData = new FormData() - const file = files[0] - formData.append('source_map', file) - await api.errorTracking.updateSymbolSet(values.uploadSymbolSetReference, formData) - actions.setUploadSymbolSetReference(null) - actions.loadSymbolSets() - lemonToast.success('Source map uploaded') + defaults: { minified: [], sourceMap: [] } as { minified: File[]; sourceMap: File[] }, + submit: async ({ minified, sourceMap }) => { + if (minified.length < 1 || sourceMap.length < 1) { + lemonToast.error('Please select both a minified file and a source map file') + return } + + const minifiedSrc = minified[0] + const sourceMapSrc = sourceMap[0] + const id = values.uploadSymbolSetId + + if (id == null) { + return + } + + const formData = new FormData() + formData.append('minified', minifiedSrc) + formData.append('source_map', sourceMapSrc) + await api.errorTracking.updateSymbolSet(id, formData) + actions.setUploadSymbolSetId(null) + actions.loadSymbolSets() + actions.resetUploadSymbolSet() + lemonToast.success('Source map uploaded') }, }, })), diff --git a/posthog/api/error_tracking.py b/posthog/api/error_tracking.py index 3943c5a784e1b..9cdfd10f09122 100644 --- a/posthog/api/error_tracking.py +++ b/posthog/api/error_tracking.py @@ -1,4 +1,6 @@ +from django.core.files.uploadedfile import UploadedFile import structlog +import hashlib from rest_framework import serializers, viewsets, status from rest_framework.response import Response @@ -14,7 +16,10 @@ from posthog.storage import object_storage -FIFTY_MEGABYTES = 50 * 1024 * 1024 +ONE_GIGABYTE = 1024 * 1024 * 1024 +JS_DATA_MAGIC = b"posthog_error_tracking" +JS_DATA_VERSION = 1 +JS_DATA_TYPE_SOURCE_AND_MAP = 2 logger = structlog.get_logger(__name__) @@ -81,7 +86,7 @@ class Meta: read_only_fields = ["team_id"] -class ErrorTrackingSymbolSetViewSet(TeamAndOrgViewSetMixin, viewsets.ReadOnlyModelViewSet): +class ErrorTrackingSymbolSetViewSet(TeamAndOrgViewSetMixin, viewsets.ModelViewSet): scope_object = "INTERNAL" queryset = ErrorTrackingSymbolSet.objects.all() serializer_class = ErrorTrackingSymbolSetSerializer @@ -97,23 +102,32 @@ def destroy(self, request, *args, **kwargs): def update(self, request, *args, **kwargs) -> Response: symbol_set = self.get_object() - symbol_set.delete() # TODO: delete file from s3 - storage_ptr = upload_symbol_set(request.FILES["source_map"], self.team_id) + minified = request.FILES["minified"] + source_map = request.FILES["source_map"] + (storage_ptr, content_hash) = upload_symbol_set(minified, source_map, self.team_id) symbol_set.storage_ptr = storage_ptr + symbol_set.content_hash = content_hash symbol_set.save() + ErrorTrackingStackFrame.objects.filter(team=self.team, symbol_set=symbol_set).delete() return Response({"ok": True}, status=status.HTTP_204_NO_CONTENT) -def upload_symbol_set(file, team_id) -> str: +def upload_symbol_set(minified: UploadedFile, source_map: UploadedFile, team_id) -> tuple[str, str]: + js_data = construct_js_data_object(minified.read(), source_map.read()) + content_hash = hashlib.sha512(js_data).hexdigest() + try: if settings.OBJECT_STORAGE_ENABLED: - if file.size > FIFTY_MEGABYTES: - raise ValidationError(code="file_too_large", detail="Source maps must be less than 50MB") + # TODO - maybe a gigabyte is too much? + if len(js_data) > ONE_GIGABYTE: + raise ValidationError( + code="file_too_large", detail="Combined source map and symbol set must be less than 1 gigabyte" + ) upload_path = f"{settings.OBJECT_STORAGE_ERROR_TRACKING_SOURCE_MAPS_FOLDER}/{str(uuid7())}" - object_storage.write(upload_path, file) - return upload_path + object_storage.write(upload_path, bytes(js_data)) + return (upload_path, content_hash) else: raise ObjectStorageUnavailable() except ObjectStorageUnavailable: @@ -121,3 +135,19 @@ def upload_symbol_set(file, team_id) -> str: code="object_storage_required", detail="Object storage must be available to allow source map uploads.", ) + + +def construct_js_data_object(minified: bytes, source_map: bytes) -> bytearray: + # See rust/cymbal/hacks/js_data.rs + data = bytearray() + data.extend(JS_DATA_MAGIC) + data.extend(JS_DATA_VERSION.to_bytes(4, "little")) + data.extend((JS_DATA_TYPE_SOURCE_AND_MAP).to_bytes(4, "little")) + # TODO - this doesn't seem right? + s_bytes = minified.decode("utf-8").encode("utf-8") + data.extend(len(s_bytes).to_bytes(8, "little")) + data.extend(s_bytes) + sm_bytes = source_map.decode("utf-8").encode("utf-8") + data.extend(len(sm_bytes).to_bytes(8, "little")) + data.extend(sm_bytes) + return data diff --git a/posthog/api/test/test_error_tracking.py b/posthog/api/test/test_error_tracking.py index 29ef7342b6efa..7086ed69c2cf8 100644 --- a/posthog/api/test/test_error_tracking.py +++ b/posthog/api/test/test_error_tracking.py @@ -101,30 +101,34 @@ def test_can_upload_a_source_map(self) -> None: ) with open(get_path_to("source.js.map"), "rb") as image: - response = self.client.put( + # Note - we just use the source map twice, because we don't expect the API to do + # any validation here - cymbal does the parsing work. + # TODO - we could have the api validate these contents before uploading, if we wanted + data = {"source_map": image, "minified": image} + response = self.client.patch( f"/api/projects/{self.team.id}/error_tracking/symbol_sets/{symbol_set.id}", - {"source_map": image}, + data, format="multipart", ) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - def test_rejects_too_large_file_type(self) -> None: - symbol_set = ErrorTrackingSymbolSet.objects.create( - ref="https://app-static-prod.posthog.com/static/chunk-BPTF6YBO.js", team=self.team, storage_ptr=None - ) - fifty_megabytes_plus_a_little = b"1" * (50 * 1024 * 1024 + 1) - fake_big_file = SimpleUploadedFile( - name="large_source.js.map", - content=fifty_megabytes_plus_a_little, - content_type="text/plain", - ) - response = self.client.put( - f"/api/projects/{self.team.id}/error_tracking/symbol_sets/{symbol_set.id}", - {"source_map": fake_big_file}, - format="multipart", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST, response.json()) - self.assertEqual(response.json()["detail"], "Source maps must be less than 50MB") + # def test_rejects_too_large_file_type(self) -> None: + # symbol_set = ErrorTrackingSymbolSet.objects.create( + # ref="https://app-static-prod.posthog.com/static/chunk-BPTF6YBO.js", team=self.team, storage_ptr=None + # ) + # fifty_megabytes_plus_a_little = b"1" * (1024 * 1024 * 1024 + 1) + # fake_big_file = SimpleUploadedFile( + # name="large_source.js.map", + # content=fifty_megabytes_plus_a_little, + # content_type="text/plain", + # ) + # response = self.client.put( + # f"/api/projects/{self.team.id}/error_tracking/symbol_sets/{symbol_set.id}", + # {"source_map": fake_big_file}, + # format="multipart", + # ) + # self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST, response.json()) + # self.assertEqual(response.json()["detail"], "Source maps must be less than 50MB") def test_rejects_upload_when_object_storage_is_unavailable(self) -> None: symbol_set = ErrorTrackingSymbolSet.objects.create( @@ -132,9 +136,10 @@ def test_rejects_upload_when_object_storage_is_unavailable(self) -> None: ) with override_settings(OBJECT_STORAGE_ENABLED=False): fake_big_file = SimpleUploadedFile(name="large_source.js.map", content=b"", content_type="text/plain") + data = {"source_map": fake_big_file, "minified": fake_big_file} response = self.client.put( f"/api/projects/{self.team.id}/error_tracking/symbol_sets/{symbol_set.id}", - {"source_map": fake_big_file}, + data, format="multipart", ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST, response.json()) diff --git a/posthog/migrations/0523_errortrackingsymbolset_content_hash.py b/posthog/migrations/0523_errortrackingsymbolset_content_hash.py new file mode 100644 index 0000000000000..709fff71a9edf --- /dev/null +++ b/posthog/migrations/0523_errortrackingsymbolset_content_hash.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.15 on 2024-11-26 16:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("posthog", "0522_datawarehouse_salesforce_opportunity"), + ] + + operations = [ + migrations.AddField( + model_name="errortrackingsymbolset", + name="content_hash", + field=models.TextField(null=True), + ), + ] diff --git a/posthog/migrations/max_migration.txt b/posthog/migrations/max_migration.txt index 334dacbdf2b21..a9fe528284987 100644 --- a/posthog/migrations/max_migration.txt +++ b/posthog/migrations/max_migration.txt @@ -1 +1 @@ -0522_datawarehouse_salesforce_opportunity +0523_errortrackingsymbolset_content_hash diff --git a/posthog/models/error_tracking/error_tracking.py b/posthog/models/error_tracking/error_tracking.py index f2e3e2276658c..b003e29b3b8a5 100644 --- a/posthog/models/error_tracking/error_tracking.py +++ b/posthog/models/error_tracking/error_tracking.py @@ -56,6 +56,7 @@ class ErrorTrackingSymbolSet(UUIDModel): # If we failed to resolve this symbol set, we store the reason here, so # we can return the language-relevant error in the future. failure_reason = models.TextField(null=True, blank=True) + content_hash = models.TextField(null=True, blank=False) class Meta: indexes = [ diff --git a/rust/.sqlx/query-361eb26d51d253242a8af33e45d8686c4393f6420e61b6b141143974ff362213.json b/rust/.sqlx/query-ca484660a37a42b92f1d7bdd49635941c9c54e770e6f02b10e7f42b05ae26bae.json similarity index 73% rename from rust/.sqlx/query-361eb26d51d253242a8af33e45d8686c4393f6420e61b6b141143974ff362213.json rename to rust/.sqlx/query-ca484660a37a42b92f1d7bdd49635941c9c54e770e6f02b10e7f42b05ae26bae.json index 8ea73c5d64197..dd47187cdad03 100644 --- a/rust/.sqlx/query-361eb26d51d253242a8af33e45d8686c4393f6420e61b6b141143974ff362213.json +++ b/rust/.sqlx/query-ca484660a37a42b92f1d7bdd49635941c9c54e770e6f02b10e7f42b05ae26bae.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "SELECT id, team_id, ref as set_ref, storage_ptr, created_at, failure_reason\n FROM posthog_errortrackingsymbolset\n WHERE team_id = $1 AND ref = $2", + "query": "SELECT id, team_id, ref as set_ref, storage_ptr, created_at, failure_reason, content_hash\n FROM posthog_errortrackingsymbolset\n WHERE team_id = $1 AND ref = $2", "describe": { "columns": [ { @@ -32,6 +32,11 @@ "ordinal": 5, "name": "failure_reason", "type_info": "Text" + }, + { + "ordinal": 6, + "name": "content_hash", + "type_info": "Text" } ], "parameters": { @@ -46,8 +51,9 @@ false, true, false, + true, true ] }, - "hash": "361eb26d51d253242a8af33e45d8686c4393f6420e61b6b141143974ff362213" + "hash": "ca484660a37a42b92f1d7bdd49635941c9c54e770e6f02b10e7f42b05ae26bae" } diff --git a/rust/.sqlx/query-d9b607bfbdb9e24dbd66b084188b377ceedb8ae01fa623a5ce84c22813113ce4.json b/rust/.sqlx/query-d9b607bfbdb9e24dbd66b084188b377ceedb8ae01fa623a5ce84c22813113ce4.json new file mode 100644 index 0000000000000..093863912e215 --- /dev/null +++ b/rust/.sqlx/query-d9b607bfbdb9e24dbd66b084188b377ceedb8ae01fa623a5ce84c22813113ce4.json @@ -0,0 +1,20 @@ +{ + "db_name": "PostgreSQL", + "query": "INSERT INTO posthog_errortrackingsymbolset (id, team_id, ref, storage_ptr, failure_reason, created_at, content_hash)\n VALUES ($1, $2, $3, $4, $5, $6, $7)\n ON CONFLICT (team_id, ref) DO UPDATE SET storage_ptr = $4, content_hash = $7", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Uuid", + "Int4", + "Text", + "Text", + "Text", + "Timestamptz", + "Text" + ] + }, + "nullable": [] + }, + "hash": "d9b607bfbdb9e24dbd66b084188b377ceedb8ae01fa623a5ce84c22813113ce4" +} diff --git a/rust/.sqlx/query-fda1f4ef877d1a71dbb6345d71c21c0eae35356f7b92e969a12a839b41cd360a.json b/rust/.sqlx/query-fda1f4ef877d1a71dbb6345d71c21c0eae35356f7b92e969a12a839b41cd360a.json deleted file mode 100644 index da2b7d92cb961..0000000000000 --- a/rust/.sqlx/query-fda1f4ef877d1a71dbb6345d71c21c0eae35356f7b92e969a12a839b41cd360a.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "INSERT INTO posthog_errortrackingsymbolset (id, team_id, ref, storage_ptr, failure_reason, created_at)\n VALUES ($1, $2, $3, $4, $5, $6)\n ON CONFLICT (team_id, ref) DO UPDATE SET storage_ptr = $4", - "describe": { - "columns": [], - "parameters": { - "Left": [ - "Uuid", - "Int4", - "Text", - "Text", - "Text", - "Timestamptz" - ] - }, - "nullable": [] - }, - "hash": "fda1f4ef877d1a71dbb6345d71c21c0eae35356f7b92e969a12a839b41cd360a" -} diff --git a/rust/cymbal/src/bin/run.sh b/rust/cymbal/src/bin/run.sh index b7e63828615fc..93c93fcc8b9aa 100755 --- a/rust/cymbal/src/bin/run.sh +++ b/rust/cymbal/src/bin/run.sh @@ -3,6 +3,7 @@ export KAFKA_CONSUMER_TOPIC="exception_symbolification_events" export OBJECT_STORAGE_BUCKET="posthog" export OBJECT_STORAGE_ACCESS_KEY_ID="object_storage_root_user" export OBJECT_STORAGE_SECRET_ACCESS_KEY="object_storage_root_password" +export FRAME_CACHE_TTL_SECONDS="1" export ALLOW_INTERNAL_IPS="true" RUST_LOG=info cargo run --bin cymbal diff --git a/rust/cymbal/src/error.rs b/rust/cymbal/src/error.rs index f1343cf18fc67..98de08f5fe75c 100644 --- a/rust/cymbal/src/error.rs +++ b/rust/cymbal/src/error.rs @@ -4,6 +4,8 @@ use serde::{Deserialize, Serialize}; use thiserror::Error; use uuid::Uuid; +use crate::hack::js_data::JsDataError; + #[derive(Debug, Error)] pub enum Error { #[error(transparent)] @@ -83,6 +85,8 @@ pub enum JsResolveErr { // For redirect loops or too many redirects #[error("Redirect error while fetching: {0}")] RedirectError(String), + #[error("JSDataError: {0}")] + JSDataError(#[from] JsDataError), } #[derive(Debug, Error)] diff --git a/rust/cymbal/src/hack/js_data.rs b/rust/cymbal/src/hack/js_data.rs new file mode 100644 index 0000000000000..cb5ed93187092 --- /dev/null +++ b/rust/cymbal/src/hack/js_data.rs @@ -0,0 +1,283 @@ +use serde::{Deserialize, Serialize}; +use symbolic::sourcemapcache::{SourceMapCache, SourceMapCacheWriter}; +use thiserror::Error; + +use crate::symbol_store::sourcemap::OwnedSourceMapCache; + +// NOTE: see psothog/api/error_tracking.py +pub struct JsData { + data: Vec, + // For legacy reasons, before we has this serialisation format, + // we wrote raw SourceMapCache data to s3. This flag is set if + // the wrapped data here is in that format + is_raw_smc: bool, +} + +#[derive(Debug)] +enum JsDataType { + SourceAndMap = 2, + SourceMapCache = 3, +} + +#[derive(Debug, Error, Serialize, Deserialize)] +pub enum JsDataError { + #[error("invalid source map cache: {0}")] + InvalidSourceMapCache(String), + #[error("Wrong version: {0}")] + WrongVersion(u32), + #[error("Data too short, at index {0}")] + DataTooShort(u64), + #[error("Data too long, at index {0}")] + DataTooLong(u64), + #[error("Invalid magic")] + InvalidMagic, + #[error("Invalid data type: {0}")] + InvalidDataType(u32), + #[error("Invalid data type {0} for operation {1}")] + InvalidDataTypeForOperation(u32, String), + #[error("Invalid utf8, got error: {0}")] + InvalidUtf8(String), +} + +impl JsData { + const MAGIC: &'static [u8] = b"posthog_error_tracking"; + const VERSION: u32 = 1; + + pub fn from_source_and_map(s: String, sm: String) -> Self { + let mut data = Vec::with_capacity(s.len() + sm.len() + 128); + Self::add_header(&mut data, JsDataType::SourceAndMap); + data.extend_from_slice(&(s.len() as u64).to_le_bytes()); + data.extend_from_slice(s.as_bytes()); + drop(s); + data.extend_from_slice(&(sm.len() as u64).to_le_bytes()); + data.extend_from_slice(sm.as_bytes()); + drop(sm); + Self { + data, + is_raw_smc: false, + } + } + + // TODO - this path has no coverage, because its never called. We should add coverage, or remove it + // entirely, once we decide whether we ever want to store SMC data rather than the original minified + // source and map + pub fn from_smc(smc: SourceMapCacheWriter, len_hint: usize) -> Result { + let mut data = Vec::with_capacity(len_hint); + Self::add_header(&mut data, JsDataType::SourceMapCache); + + let len_before = data.len(); + // Reserve space for the length of the SMC + data.extend_from_slice(&0u64.to_le_bytes()); + + smc.serialize(&mut data) + .map_err(|e| JsDataError::InvalidSourceMapCache(e.to_string()))?; + + let len_after = data.len(); + let smc_len = (len_after - len_before) as u64; + data[len_before..len_before + 8].copy_from_slice(&smc_len.to_le_bytes()); + Ok(Self { + data, + is_raw_smc: false, + }) + } + + pub fn from_bytes(data: Vec) -> Result { + let maybe = Self { + data, + is_raw_smc: false, + }; + + if let Err(e) = maybe.assert_has_header() { + return maybe.try_be_a_raw_source_map_cache().ok_or(e); + } + if let Err(e) = maybe.assert_has_magic() { + return maybe.try_be_a_raw_source_map_cache().ok_or(e); + } + + let version = maybe.get_version(); + if version > Self::VERSION { + return Err(JsDataError::WrongVersion(version)); + } + + let data_type = maybe.get_data_type()?; + + match data_type { + JsDataType::SourceAndMap => { + maybe.assert_has_source_and_map()?; + } + JsDataType::SourceMapCache => { + maybe.assert_has_source_map_cache()?; + } + } + + Ok(maybe) + } + + pub fn to_bytes(self) -> Vec { + self.data + } + + pub fn to_smc(self) -> Result { + if self.is_raw_smc { + // UNWRAP: safe as flag is only set by try_be_a_raw_source_map_cache, which + // asserts this parse succeeds + return Ok(OwnedSourceMapCache::new(self.data).unwrap()); + } + + match self.get_data_type()? { + JsDataType::SourceAndMap => { + let source = std::str::from_utf8(self.get_minified_source()?) + .map_err(|e| JsDataError::InvalidUtf8(e.to_string()))?; + let map = std::str::from_utf8(self.get_sourcemap()?) + .map_err(|e| JsDataError::InvalidUtf8(e.to_string()))?; + OwnedSourceMapCache::from_source_and_map(source, map) + .map_err(|e| JsDataError::InvalidSourceMapCache(e.to_string())) + } + JsDataType::SourceMapCache => { + OwnedSourceMapCache::new(self.data[Self::header_len() + 8..].to_vec()) + .map_err(|e| JsDataError::InvalidSourceMapCache(e.to_string())) + } + } + } + + fn add_header(data: &mut Vec, data_type: JsDataType) { + data.extend_from_slice(Self::MAGIC); + data.extend_from_slice(&Self::VERSION.to_le_bytes()); + data.extend_from_slice(&(data_type as u32).to_le_bytes()); + } + + fn assert_has_header(&self) -> Result<(), JsDataError> { + if self.data.len() < Self::header_len() { + return Err(JsDataError::DataTooShort(0)); + } + Ok(()) + } + + fn assert_has_magic(&self) -> Result<(), JsDataError> { + if &self.data[..Self::MAGIC.len()] != Self::MAGIC { + return Err(JsDataError::InvalidMagic); + } + Ok(()) + } + + fn get_version(&self) -> u32 { + u32::from_le_bytes( + self.data[Self::MAGIC.len()..Self::MAGIC.len() + 4] + .try_into() + .unwrap(), + ) + } + + fn get_data_type(&self) -> Result { + let data_type = u32::from_le_bytes( + self.data[Self::header_len() - 4..Self::header_len()] + .try_into() + .unwrap(), + ); + JsDataType::try_from(data_type) + } + + fn assert_has_source_and_map(&self) -> Result<(), JsDataError> { + let s_len = u64::from_le_bytes( + self.data[Self::header_len()..Self::header_len() + 8] + .try_into() + .unwrap(), + ); + + let sm_len = u64::from_le_bytes( + self.data[Self::header_len() + 8 + s_len as usize + ..Self::header_len() + 8 + s_len as usize + 8] + .try_into() + .unwrap(), + ); + + let expected_length = Self::header_len() + 16 + s_len as usize + sm_len as usize; + + if self.data.len() < expected_length { + return Err(JsDataError::DataTooShort(expected_length as u64)); + } + + if self.data.len() > expected_length { + return Err(JsDataError::DataTooLong(expected_length as u64)); + } + Ok(()) + } + + fn assert_has_source_map_cache(&self) -> Result<(), JsDataError> { + let smc_len = u64::from_le_bytes( + self.data[Self::header_len()..Self::header_len() + 8] + .try_into() + .unwrap(), + ); + + let expected_length = Self::header_len() + 8 + smc_len as usize; + + if self.data.len() < expected_length { + return Err(JsDataError::DataTooShort(expected_length as u64)); + } + + if self.data.len() > expected_length { + return Err(JsDataError::DataTooLong(expected_length as u64)); + } + Ok(()) + } + + fn get_minified_source(&self) -> Result<&[u8], JsDataError> { + if !matches!(self.get_data_type()?, JsDataType::SourceAndMap) { + return Err(JsDataError::InvalidDataTypeForOperation( + self.get_data_type()? as u32, + "get_minified_source".to_string(), + )); + } + let s_len = u64::from_le_bytes( + self.data[Self::header_len()..Self::header_len() + 8] + .try_into() + .unwrap(), + ); + + Ok(&self.data[Self::header_len() + 8..Self::header_len() + 8 + s_len as usize]) + } + + fn get_sourcemap(&self) -> Result<&[u8], JsDataError> { + if !matches!(self.get_data_type()?, JsDataType::SourceAndMap) { + return Err(JsDataError::InvalidDataTypeForOperation( + self.get_data_type()? as u32, + "get_sourcemap".to_string(), + )); + } + + let s_len = self.get_minified_source()?.len(); + let sm_len = u64::from_le_bytes( + self.data[Self::header_len() + 8 + s_len..Self::header_len() + 8 + s_len + 8] + .try_into() + .unwrap(), + ); + Ok(&self.data[Self::header_len() + 8 + s_len + 8 + ..Self::header_len() + 8 + s_len + 8 + sm_len as usize]) + } + + fn try_be_a_raw_source_map_cache(mut self) -> Option { + let test = SourceMapCache::parse(&self.data).is_ok(); + if test { + self.is_raw_smc = true; + return Some(self); + } + None + } + + const fn header_len() -> usize { + Self::MAGIC.len() + Self::VERSION.to_le_bytes().len() + std::mem::size_of::() + } +} + +impl TryFrom for JsDataType { + type Error = JsDataError; + + fn try_from(value: u32) -> Result { + match value { + 2 => Ok(JsDataType::SourceAndMap), + 3 => Ok(JsDataType::SourceMapCache), + _ => Err(JsDataError::InvalidDataType(value)), + } + } +} diff --git a/rust/cymbal/src/hack/mod.rs b/rust/cymbal/src/hack/mod.rs index 5600411578915..e04a87c2eeeb9 100644 --- a/rust/cymbal/src/hack/mod.rs +++ b/rust/cymbal/src/hack/mod.rs @@ -1,3 +1,3 @@ +pub mod js_data; pub mod kafka; - // See comment in Cargo.toml, this module exists because of bugs we're working around diff --git a/rust/cymbal/src/symbol_store/saving.rs b/rust/cymbal/src/symbol_store/saving.rs index 70ddaefa6fbb1..eed2144669a0a 100644 --- a/rust/cymbal/src/symbol_store/saving.rs +++ b/rust/cymbal/src/symbol_store/saving.rs @@ -1,6 +1,7 @@ use axum::async_trait; use chrono::{DateTime, Utc}; +use sha2::{Digest, Sha512}; use sqlx::PgPool; use tracing::{error, info}; use uuid::Uuid; @@ -35,6 +36,7 @@ pub struct SymbolSetRecord { pub storage_ptr: Option, pub failure_reason: Option, pub created_at: DateTime, + pub content_hash: Option, } // This is the "intermediate" symbol set data. Rather than a simple `Vec`, the saving layer @@ -75,6 +77,8 @@ impl Saving { let start = common_metrics::timing_guard(SAVE_SYMBOL_SET, &[]).label("data", "true"); // Generate a new opaque key, appending our prefix. let key = self.add_prefix(Uuid::now_v7().to_string()); + let mut content_hasher = Sha512::new(); + content_hasher.update(&data); let record = SymbolSetRecord { id: Uuid::now_v7(), @@ -83,6 +87,7 @@ impl Saving { storage_ptr: Some(key.clone()), failure_reason: None, created_at: Utc::now(), + content_hash: Some(format!("{:x}", content_hasher.finalize())), }; self.s3_client.put(&self.bucket, &key, data).await?; @@ -106,6 +111,7 @@ impl Saving { storage_ptr: None, failure_reason: Some(serde_json::to_string(&reason)?), created_at: Utc::now(), + content_hash: None, } .save(&self.pool) .await?; @@ -132,7 +138,7 @@ where info!("Fetching symbol set data for {}", set_ref); if let Some(record) = SymbolSetRecord::load(&self.pool, team_id, &set_ref).await? { if let Some(storage_ptr) = record.storage_ptr { - info!("Found symbol set data for {}", set_ref); + info!("Found s3 saved symbol set data for {}", set_ref); let data = self.s3_client.get(&self.bucket, &storage_ptr).await?; metrics::counter!(SAVED_SYMBOL_SET_LOADED).increment(1); return Ok(Saveable { @@ -227,7 +233,7 @@ impl SymbolSetRecord { { let record = sqlx::query_as!( SymbolSetRecord, - r#"SELECT id, team_id, ref as set_ref, storage_ptr, created_at, failure_reason + r#"SELECT id, team_id, ref as set_ref, storage_ptr, created_at, failure_reason, content_hash FROM posthog_errortrackingsymbolset WHERE team_id = $1 AND ref = $2"#, team_id, @@ -244,15 +250,16 @@ impl SymbolSetRecord { E: sqlx::Executor<'c, Database = sqlx::Postgres>, { sqlx::query!( - r#"INSERT INTO posthog_errortrackingsymbolset (id, team_id, ref, storage_ptr, failure_reason, created_at) - VALUES ($1, $2, $3, $4, $5, $6) - ON CONFLICT (team_id, ref) DO UPDATE SET storage_ptr = $4"#, + r#"INSERT INTO posthog_errortrackingsymbolset (id, team_id, ref, storage_ptr, failure_reason, created_at, content_hash) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (team_id, ref) DO UPDATE SET storage_ptr = $4, content_hash = $7"#, self.id, self.team_id, self.set_ref, self.storage_ptr, self.failure_reason, - self.created_at + self.created_at, + self.content_hash ) .execute(e) .await?; diff --git a/rust/cymbal/src/symbol_store/sourcemap.rs b/rust/cymbal/src/symbol_store/sourcemap.rs index af69a52bdca08..ab5f70169301a 100644 --- a/rust/cymbal/src/symbol_store/sourcemap.rs +++ b/rust/cymbal/src/symbol_store/sourcemap.rs @@ -8,6 +8,7 @@ use tracing::{info, warn}; use crate::{ config::Config, error::{Error, JsResolveErr}, + hack::js_data::JsData, metric_consts::{ SOURCEMAP_BODY_FETCHES, SOURCEMAP_BODY_REF_FOUND, SOURCEMAP_FETCH, SOURCEMAP_HEADER_FOUND, SOURCEMAP_NOT_FOUND, SOURCEMAP_PARSE, @@ -29,16 +30,20 @@ pub struct OwnedSourceMapCache { } impl OwnedSourceMapCache { - pub fn new(data: Vec, r: impl ToString) -> Result { + pub fn new(data: Vec) -> Result { // Pass-through parse once to assert we're given valid data, so the unwrap below // is safe. - SourceMapCache::parse(&data).map_err(|e| { - JsResolveErr::InvalidSourceMapCache(format!( - "Got error {} for url {}", - e, - r.to_string() - )) - })?; + SourceMapCache::parse(&data)?; + Ok(Self { data }) + } + + pub fn from_source_and_map( + source: &str, + sourcemap: &str, + ) -> Result { + let mut data = Vec::with_capacity(source.len() + sourcemap.len() + 128); + let smcw = SourceMapCacheWriter::new(source, sourcemap)?; + smcw.serialize(&mut data).unwrap(); Ok(Self { data }) } @@ -77,22 +82,11 @@ impl Fetcher for SourcemapProvider { let sourcemap = fetch_source_map(&self.client, sourcemap_url.clone()).await?; - // TOTAL GUESS at a reasonable capacity here, btw - let mut cache_bytes = Vec::with_capacity(minified_source.len() + sourcemap.len()); - - let writer = SourceMapCacheWriter::new(&minified_source, &sourcemap).map_err(|e| { - JsResolveErr::InvalidSourceMapCache(format!( - "Failed to construct sourcemap cache: {}, for sourcemap url {}", - e, sourcemap_url - )) - })?; - - // UNWRAP: writing into a vector always succeeds - writer.serialize(&mut cache_bytes).unwrap(); + let data = JsData::from_source_and_map(minified_source, sourcemap); start.label("found_data", "true").fin(); - Ok(cache_bytes) + Ok(data.to_bytes()) } } @@ -102,9 +96,11 @@ impl Parser for SourcemapProvider { type Set = OwnedSourceMapCache; async fn parse(&self, data: Vec) -> Result { let start = common_metrics::timing_guard(SOURCEMAP_PARSE, &[]); - let sm = OwnedSourceMapCache::new(data, "parse")?; + let smc = JsData::from_bytes(data) + .and_then(JsData::to_smc) + .map_err(JsResolveErr::from)?; start.label("success", "true").fin(); - Ok(sm) + Ok(smc) } } diff --git a/rust/cymbal/src/types/mod.rs b/rust/cymbal/src/types/mod.rs index 9f09e5cf83cd9..01850217127f0 100644 --- a/rust/cymbal/src/types/mod.rs +++ b/rust/cymbal/src/types/mod.rs @@ -76,8 +76,8 @@ pub struct OutputErrProps { impl Exception { pub fn include_in_fingerprint(&self, h: &mut Sha512) { h.update(self.exception_type.as_bytes()); - h.update(self.exception_message.as_bytes()); let Some(Stacktrace::Resolved { frames }) = &self.stack else { + h.update(self.exception_message.as_bytes()); return; }; diff --git a/rust/cymbal/tests/test_migrations/20241101134611_test_migration_for_symbol_set_saving_tests.sql b/rust/cymbal/tests/test_migrations/20241101134611_test_migration_for_symbol_set_saving_tests.sql index b9b73d3a43de7..fc3da5157f003 100644 --- a/rust/cymbal/tests/test_migrations/20241101134611_test_migration_for_symbol_set_saving_tests.sql +++ b/rust/cymbal/tests/test_migrations/20241101134611_test_migration_for_symbol_set_saving_tests.sql @@ -5,6 +5,7 @@ CREATE TABLE posthog_errortrackingsymbolset ( created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), storage_ptr TEXT, failure_reason TEXT, + content_hash TEXT, CONSTRAINT unique_ref_per_team UNIQUE (team_id, ref) );