diff --git a/plugin-server/src/worker/ingestion/person-state.ts b/plugin-server/src/worker/ingestion/person-state.ts index e5f1327895cfa..72a82a07d0aec 100644 --- a/plugin-server/src/worker/ingestion/person-state.ts +++ b/plugin-server/src/worker/ingestion/person-state.ts @@ -17,9 +17,17 @@ import { castTimestampOrNow, UUIDT } from '../../utils/utils' import { captureIngestionWarning } from './utils' const MAX_FAILED_PERSON_MERGE_ATTEMPTS = 3 + +export const mergeFinalFailuresCounter = new Counter({ + name: 'person_merge_final_failure_total', + help: 'Number of person merge final failures.', +}) + +// used to prevent identify from being used with generic IDs +// that we can safely assume stem from a bug or mistake // used to prevent identify from being used with generic IDs // that we can safely assume stem from a bug or mistake -const CASE_INSENSITIVE_ILLEGAL_IDS = new Set([ +const BARE_CASE_INSENSITIVE_ILLEGAL_IDS = [ 'anonymous', 'guest', 'distinctid', @@ -30,17 +38,34 @@ const CASE_INSENSITIVE_ILLEGAL_IDS = new Set([ 'undefined', 'true', 'false', -]) - -export const mergeFinalFailuresCounter = new Counter({ - name: 'person_merge_final_failure_total', - help: 'Number of person merge final failures.', -}) - -const CASE_SENSITIVE_ILLEGAL_IDS = new Set(['[object Object]', 'NaN', 'None', 'none', 'null', '0', 'undefined']) +] + +const BARE_CASE_SENSITIVE_ILLEGAL_IDS = ['[object Object]', 'NaN', 'None', 'none', 'null', '0', 'undefined'] + +// we have seen illegal ids received but wrapped in double quotes +// to protect ourselves from this we'll add the single- and double-quoted versions of the illegal ids +const singleQuoteIds = (ids: string[]) => ids.map((id) => `'${id}'`) +const doubleQuoteIds = (ids: string[]) => ids.map((id) => `"${id}"`) + +// some ids are illegal regardless of casing +// while others are illegal only when cased +// so, for example, we want to forbid `NaN` but not `nan` +// but, we will forbid `uNdEfInEd` and `undefined` +const CASE_INSENSITIVE_ILLEGAL_IDS = new Set( + BARE_CASE_INSENSITIVE_ILLEGAL_IDS.concat(singleQuoteIds(BARE_CASE_INSENSITIVE_ILLEGAL_IDS)).concat( + doubleQuoteIds(BARE_CASE_INSENSITIVE_ILLEGAL_IDS) + ) +) + +const CASE_SENSITIVE_ILLEGAL_IDS = new Set( + BARE_CASE_SENSITIVE_ILLEGAL_IDS.concat(singleQuoteIds(BARE_CASE_SENSITIVE_ILLEGAL_IDS)).concat( + doubleQuoteIds(BARE_CASE_SENSITIVE_ILLEGAL_IDS) + ) +) const isDistinctIdIllegal = (id: string): boolean => { - return id.trim() === '' || CASE_INSENSITIVE_ILLEGAL_IDS.has(id.toLowerCase()) || CASE_SENSITIVE_ILLEGAL_IDS.has(id) + const trimmed = id.trim() + return trimmed === '' || CASE_INSENSITIVE_ILLEGAL_IDS.has(id.toLowerCase()) || CASE_SENSITIVE_ILLEGAL_IDS.has(id) } // This class is responsible for creating/updating a single person through the process-event pipeline @@ -245,7 +270,7 @@ export class PersonState { this.teamId, this.timestamp ) - } else if (this.event.event === '$identify' && this.eventProperties['$anon_distinct_id']) { + } else if (this.event.event === '$identify' && '$anon_distinct_id' in this.eventProperties) { return await this.merge( String(this.eventProperties['$anon_distinct_id']), this.distinctId, diff --git a/plugin-server/tests/worker/ingestion/person-state.test.ts b/plugin-server/tests/worker/ingestion/person-state.test.ts index b44f60e8d2dda..66fa35976d274 100644 --- a/plugin-server/tests/worker/ingestion/person-state.test.ts +++ b/plugin-server/tests/worker/ingestion/person-state.test.ts @@ -25,17 +25,20 @@ describe('PersonState.update()', () => { let uuid2: UUIDT let teamId: number let poEEmbraceJoin: boolean + let organizationId: string beforeAll(async () => { ;[hub, closeHub] = await createHub({}) await hub.db.clickhouseQuery('SYSTEM STOP MERGES') + + organizationId = await createOrganization(hub.db.postgres) }) beforeEach(async () => { poEEmbraceJoin = false uuid = new UUIDT() uuid2 = new UUIDT() - const organizationId = await createOrganization(hub.db.postgres) + teamId = await createTeam(hub.db.postgres, organizationId) jest.spyOn(hub.db, 'fetchPerson') @@ -1078,10 +1081,11 @@ describe('PersonState.update()', () => { hub.statsd = { increment: jest.fn() } as any }) - it('stops $identify if current distinct_id is illegal', async () => { + const illegalIds = ['', ' ', 'null', 'undefined', '"undefined"', '[object Object]', '"[object Object]"'] + it.each(illegalIds)('stops $identify if current distinct_id is illegal: `%s`', async (illegalId: string) => { const person = await personState({ event: '$identify', - distinct_id: '[object Object]', + distinct_id: illegalId, properties: { $anon_distinct_id: 'anonymous_id', }, @@ -1092,16 +1096,16 @@ describe('PersonState.update()', () => { expect(persons.length).toEqual(0) expect(hub.statsd!.increment).toHaveBeenCalledWith('illegal_distinct_ids.total', { - distinctId: '[object Object]', + distinctId: illegalId, }) }) - it('stops $identify if $anon_distinct_id is illegal', async () => { + it.each(illegalIds)('stops $identify if $anon_distinct_id is illegal: `%s`', async (illegalId: string) => { const person = await personState({ event: '$identify', distinct_id: 'some_distinct_id', properties: { - $anon_distinct_id: 'undefined', + $anon_distinct_id: illegalId, }, }).handleIdentifyOrAlias() @@ -1110,7 +1114,7 @@ describe('PersonState.update()', () => { expect(persons.length).toEqual(0) expect(hub.statsd!.increment).toHaveBeenCalledWith('illegal_distinct_ids.total', { - distinctId: 'undefined', + distinctId: illegalId, }) })