Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add another illegal id #17325

Merged
merged 9 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 23 additions & 10 deletions plugin-server/src/worker/ingestion/person-state.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,22 @@ import { castTimestampOrNow, UUIDT } from '../../utils/utils'
import { captureIngestionWarning } from './utils'

const MAX_FAILED_PERSON_MERGE_ATTEMPTS = 3

export const mergeFinalFailuresCounter = new Counter({
name: 'person_merge_final_failure_total',
help: 'Number of person merge final failures.',
})

// used to prevent identify from being used with generic IDs
// that we can safely assume stem from a bug or mistake
const CASE_INSENSITIVE_ILLEGAL_IDS = new Set([
const BASE_ILLEGAL_IDS = [
'',
'[object Object]'.toLowerCase(),
'NaN'.toLowerCase(),
'none',
'null',
'0',
'undefined',
'anonymous',
'guest',
'distinctid',
Expand All @@ -30,17 +43,17 @@ const CASE_INSENSITIVE_ILLEGAL_IDS = new Set([
'undefined',
'true',
'false',
])

export const mergeFinalFailuresCounter = new Counter({
name: 'person_merge_final_failure_total',
help: 'Number of person merge final failures.',
})
]
// we have seen illegal ids received but wrapped in double quotes
// to protect ourselves from this we'll add the single- and double-quoted versions of the illegal ids
const SINGLE_QUOTED_ILLEGAL_IDS = BASE_ILLEGAL_IDS.map((id) => `'${id}'`)
const DOUBLE_QUOTED_ILLEGAL_IDS = BASE_ILLEGAL_IDS.map((id) => `"${id}"`)

const CASE_SENSITIVE_ILLEGAL_IDS = new Set(['[object Object]', 'NaN', 'None', 'none', 'null', '0', 'undefined'])
export const ILLEGAL_IDS = new Set(BASE_ILLEGAL_IDS.concat(SINGLE_QUOTED_ILLEGAL_IDS).concat(DOUBLE_QUOTED_ILLEGAL_IDS))

const isDistinctIdIllegal = (id: string): boolean => {
return id.trim() === '' || CASE_INSENSITIVE_ILLEGAL_IDS.has(id.toLowerCase()) || CASE_SENSITIVE_ILLEGAL_IDS.has(id)
const trimmed = id.trim()
return trimmed === '' || ILLEGAL_IDS.has(trimmed.toLowerCase())
pauldambra marked this conversation as resolved.
Show resolved Hide resolved
}

// This class is responsible for creating/updating a single person through the process-event pipeline
Expand Down Expand Up @@ -245,7 +258,7 @@ export class PersonState {
this.teamId,
this.timestamp
)
} else if (this.event.event === '$identify' && this.eventProperties['$anon_distinct_id']) {
} else if (this.event.event === '$identify' && '$anon_distinct_id' in this.eventProperties) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

testing for every illegal id showed that even though we check for the empty string you couldn't get into this branch of the code if the anon distinct id was the empty string

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return await this.merge(
String(this.eventProperties['$anon_distinct_id']),
this.distinctId,
Expand Down
38 changes: 30 additions & 8 deletions plugin-server/tests/worker/ingestion/person-state.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { createHub } from '../../../src/utils/db/hub'
import { PostgresUse } from '../../../src/utils/db/postgres'
import { defaultRetryConfig } from '../../../src/utils/retries'
import { UUIDT } from '../../../src/utils/utils'
import { ageInMonthsLowCardinality, PersonState } from '../../../src/worker/ingestion/person-state'
import { ageInMonthsLowCardinality, ILLEGAL_IDS, PersonState } from '../../../src/worker/ingestion/person-state'
import { delayUntilEventIngested } from '../../helpers/clickhouse'
import { createOrganization, createTeam, fetchPostgresPersons, insertRow } from '../../helpers/sql'

Expand All @@ -25,17 +25,20 @@ describe('PersonState.update()', () => {
let uuid2: UUIDT
let teamId: number
let poEEmbraceJoin: boolean
let organizationId: string

beforeAll(async () => {
;[hub, closeHub] = await createHub({})
await hub.db.clickhouseQuery('SYSTEM STOP MERGES')

organizationId = await createOrganization(hub.db.postgres)
pauldambra marked this conversation as resolved.
Show resolved Hide resolved
})

beforeEach(async () => {
poEEmbraceJoin = false
uuid = new UUIDT()
uuid2 = new UUIDT()
const organizationId = await createOrganization(hub.db.postgres)

teamId = await createTeam(hub.db.postgres, organizationId)

jest.spyOn(hub.db, 'fetchPerson')
Expand Down Expand Up @@ -1078,10 +1081,11 @@ describe('PersonState.update()', () => {
hub.statsd = { increment: jest.fn() } as any
})

it('stops $identify if current distinct_id is illegal', async () => {
const illegalIds = Array.from(ILLEGAL_IDS.values())
it.each(illegalIds)('stops $identify if current distinct_id is illegal: `%s`', async (illegalId: string) => {
pauldambra marked this conversation as resolved.
Show resolved Hide resolved
const person = await personState({
event: '$identify',
distinct_id: '[object Object]',
distinct_id: illegalId,
properties: {
$anon_distinct_id: 'anonymous_id',
},
Expand All @@ -1092,16 +1096,34 @@ describe('PersonState.update()', () => {
expect(persons.length).toEqual(0)

expect(hub.statsd!.increment).toHaveBeenCalledWith('illegal_distinct_ids.total', {
distinctId: '[object Object]',
distinctId: illegalId,
})
})

it('stops $identify if $anon_distinct_id is illegal: ``', async () => {
const person = await personState({
event: '$identify',
distinct_id: 'some_distinct_id',
properties: {
$anon_distinct_id: '',
},
}).handleIdentifyOrAlias()

expect(person).toEqual(undefined)
const persons = await fetchPostgresPersonsH()
expect(persons.length).toEqual(0)

expect(hub.statsd!.increment).toHaveBeenCalledWith('illegal_distinct_ids.total', {
distinctId: '',
})
})

it('stops $identify if $anon_distinct_id is illegal', async () => {
it.each(illegalIds)('stops $identify if $anon_distinct_id is illegal: `%s`', async (illegalId: string) => {
pauldambra marked this conversation as resolved.
Show resolved Hide resolved
const person = await personState({
event: '$identify',
distinct_id: 'some_distinct_id',
properties: {
$anon_distinct_id: 'undefined',
$anon_distinct_id: illegalId,
},
}).handleIdentifyOrAlias()

Expand All @@ -1110,7 +1132,7 @@ describe('PersonState.update()', () => {
expect(persons.length).toEqual(0)

expect(hub.statsd!.increment).toHaveBeenCalledWith('illegal_distinct_ids.total', {
distinctId: 'undefined',
distinctId: illegalId,
})
})

Expand Down