diff --git a/plugin-server/src/utils/db/utils.ts b/plugin-server/src/utils/db/utils.ts index acc7fbf09ae4a..933921667db82 100644 --- a/plugin-server/src/utils/db/utils.ts +++ b/plugin-server/src/utils/db/utils.ts @@ -15,7 +15,7 @@ import { TimestampFormat, } from '../../types' import { status } from '../../utils/status' -import { castTimestampOrNow } from '../../utils/utils' +import { areMapsEqual, castTimestampOrNow } from '../../utils/utils' export function unparsePersonPartial(person: Partial): Partial { return { ...(person as BasePerson), ...(person.created_at ? { created_at: person.created_at.toISO() } : {}) } @@ -50,26 +50,10 @@ export function timeoutGuard( } }, timeout) } - -// when changing this set, be sure to update the frontend as well (taxonomy.tsx (eventToPersonProperties)) -export const eventToPersonProperties = new Set([ - // mobile params - '$app_build', - '$app_name', - '$app_namespace', - '$app_version', - // web params - '$browser', - '$browser_version', - '$device_type', - '$current_url', - '$pathname', - '$os', - '$os_name', // $os_name is a special case, it's treated as an alias of $os! - '$os_version', - '$referring_domain', - '$referrer', - // campaign params - automatically added by posthog-js here https://github.com/PostHog/posthog-js/blob/master/src/utils/event-utils.ts +// When changing this set, make sure you also make the same changes in: +// - taxonomy.tsx (CAMPAIGN_PROPERTIES) +// - posthog-js event-utils.ts (CAMPAIGN_PARAMS) +export const campaignParams = new Set([ 'utm_source', 'utm_medium', 'utm_campaign', @@ -91,6 +75,31 @@ export const eventToPersonProperties = new Set([ 'ttclid', // tiktok 'rdt_cid', // reddit ]) + +export const initialCampaignParams = new Set(Array.from(campaignParams, (key) => `$initial_${key.replace('$', '')}`)) + +// When changing this set, make sure you also make the same changes in: +// - taxonomy.tsx (PERSON_PROPERTIES_ADAPTED_FROM_EVENT) +export const eventToPersonProperties = new Set([ + // mobile params + '$app_build', + '$app_name', + '$app_namespace', + '$app_version', + // web params + '$browser', + '$browser_version', + '$device_type', + '$current_url', + '$pathname', + '$os', + '$os_name', // $os_name is a special case, it's treated as an alias of $os! + '$os_version', + '$referring_domain', + '$referrer', + + ...campaignParams, +]) export const initialEventToPersonProperties = new Set( Array.from(eventToPersonProperties, (key) => `$initial_${key.replace('$', '')}`) ) @@ -133,11 +142,38 @@ export function personInitialAndUTMProperties(properties: Properties): Propertie return propertiesCopy } -export function hasSetOrSetOnceInitialEventToPersonProperty(properties: Properties): boolean { - return ( - Object.keys(properties.$set || {}).some((key) => initialEventToPersonProperties.has(key)) || - Object.keys(properties.$set_once || {}).some((key) => initialEventToPersonProperties.has(key)) +export function hasDifferenceWithProposedNewNormalisationMode(properties: Properties): boolean { + // this functions checks if there would be a difference in the properties if we strip the initial campaign params + // when any $set_once initial eventToPersonProperties are present. This will often return true for events from + // posthog-js, but it is unknown if this will be the case for other SDKs. + if ( + !properties.$set_once || + !Object.keys(properties.$set_once).some((key) => initialEventToPersonProperties.has(key)) + ) { + return false + } + + const propertiesForPerson: [string, any][] = Object.entries(properties).filter(([key]) => + eventToPersonProperties.has(key) ) + + const maybeSetOnce: [string, any][] = propertiesForPerson.map(([key, value]) => [ + `$initial_${key.replace('$', '')}`, + value, + ]) + + if (maybeSetOnce.length === 0) { + return false + } + + const filteredMayBeSetOnce = maybeSetOnce.filter(([key]) => !initialCampaignParams.has(key)) + + const setOnce = new Map(Object.entries({ ...Object.fromEntries(maybeSetOnce), ...(properties.$set_once || {}) })) + const filteredSetOnce = new Map( + Object.entries({ ...Object.fromEntries(filteredMayBeSetOnce), ...(properties.$set_once || {}) }) + ) + + return !areMapsEqual(setOnce, filteredSetOnce) } export function generateKafkaPersonUpdateMessage(person: InternalPerson, isDeleted = false): ProducerRecord { diff --git a/plugin-server/src/utils/event.ts b/plugin-server/src/utils/event.ts index 95652a604e882..0d3f59bd8a4d7 100644 --- a/plugin-server/src/utils/event.ts +++ b/plugin-server/src/utils/event.ts @@ -5,7 +5,11 @@ import { Counter } from 'prom-client' import { setUsageInNonPersonEventsCounter } from '../main/ingestion-queues/metrics' import { ClickHouseEvent, HookPayload, PipelineEvent, PostIngestionEvent, RawClickHouseEvent } from '../types' import { chainToElements } from './db/elements-chain' -import { hasSetOrSetOnceInitialEventToPersonProperty, personInitialAndUTMProperties, sanitizeString } from './db/utils' +import { + hasDifferenceWithProposedNewNormalisationMode, + personInitialAndUTMProperties, + sanitizeString, +} from './db/utils' import { clickHouseTimestampSecondPrecisionToISO, clickHouseTimestampToDateTime, @@ -21,9 +25,9 @@ const KNOWN_SET_EVENTS = new Set([ 'survey sent', ]) -const RAW_INITIAL_EVENT_TO_PERSON_PROPERTY_COUNTER = new Counter({ - name: 'raw_set_once_initial_event_to_person_property', - help: 'Counter for events that have a $set_once.$initial_X property where X is an event-to-person property', +const DIFFERENCE_WITH_PROPOSED_NORMALISATION_MODE_COUNTER = new Counter({ + name: 'difference_with_proposed_normalisation_mode', + help: 'Counter for events that would give a different result with the new proposed normalisation mode', labelNames: ['library'], }) @@ -206,8 +210,8 @@ export function normalizeEvent(event: T): // For safety while PluginEvent still has an `ip` field event.ip = null - if (hasSetOrSetOnceInitialEventToPersonProperty(properties)) { - RAW_INITIAL_EVENT_TO_PERSON_PROPERTY_COUNTER.labels({ + if (hasDifferenceWithProposedNewNormalisationMode(properties)) { + DIFFERENCE_WITH_PROPOSED_NORMALISATION_MODE_COUNTER.labels({ library: getKnownLibValueOrSentinel(properties['$lib']), }).inc() } diff --git a/plugin-server/src/utils/utils.ts b/plugin-server/src/utils/utils.ts index bd606072170a0..215b2588c1c7d 100644 --- a/plugin-server/src/utils/utils.ts +++ b/plugin-server/src/utils/utils.ts @@ -675,3 +675,16 @@ export const getKnownLibValueOrSentinel = (lib: string): string => { } return '$other' } + +// Check if 2 maps with primitive values are equal +export const areMapsEqual = (map1: Map, map2: Map): boolean => { + if (map1.size !== map2.size) { + return false + } + for (const [key, value] of map1) { + if (!map2.has(key) || map2.get(key) !== value) { + return false + } + } + return true +}