From 4f50326aec0967ac5c3764bfb5ca1b9c7e051c93 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Mon, 22 Jan 2024 17:58:43 +0100 Subject: [PATCH] fix(taxonomy): don't convert numeric strings to numbers (#19774) --- .../functional_tests/definitions.test.ts | 4 +- .../property-definitions-auto-discovery.ts | 10 - .../tests/main/process-event.test.ts | 587 +++++++++--------- ...roperty-definitions-auto-discovery.test.ts | 6 +- .../property-definitions-manager.test.ts | 22 +- 5 files changed, 307 insertions(+), 322 deletions(-) diff --git a/plugin-server/functional_tests/definitions.test.ts b/plugin-server/functional_tests/definitions.test.ts index ac02c73e304ce..574fdf5d091ba 100644 --- a/plugin-server/functional_tests/definitions.test.ts +++ b/plugin-server/functional_tests/definitions.test.ts @@ -57,8 +57,8 @@ test.concurrent.each([[2], [2.1234], ['2'], ['2.1234']])( expect(propertyDefinitions).toContainEqual( expect.objectContaining({ name: 'property', - is_numerical: true, - property_type: 'Numeric', + is_numerical: typeof numberValue === 'number', + property_type: typeof numberValue === 'number' ? 'Numeric' : 'String', }) ) }) diff --git a/plugin-server/src/worker/ingestion/property-definitions-auto-discovery.ts b/plugin-server/src/worker/ingestion/property-definitions-auto-discovery.ts index 0ae6f60845883..3c1e88012cfeb 100644 --- a/plugin-server/src/worker/ingestion/property-definitions-auto-discovery.ts +++ b/plugin-server/src/worker/ingestion/property-definitions-auto-discovery.ts @@ -3,10 +3,6 @@ import { DateTimePropertyTypeFormat, PropertyType, UnixTimestampPropertyTypeForm // magic copied from https://stackoverflow.com/a/54930905 // allows candidate to be typed as any -export const isNumericString = (candidate: any): boolean => { - return !(candidate instanceof Array) && candidate - parseFloat(candidate) + 1 >= 0 -} - export const unixTimestampPropertyTypeFormatPatterns: Record = { UNIX_TIMESTAMP: /^\d{10}(\.\d*)?$/, UNIX_TIMESTAMP_MILLISECONDS: /^\d{13}$/, @@ -106,18 +102,12 @@ export const detectPropertyDefinitionTypes = (value: unknown, key: string): Prop if (typeof value === 'string') { propertyType = PropertyType.String - if (isNumericString(value)) { - propertyType = PropertyType.Numeric - } - Object.values(dateTimePropertyTypeFormatPatterns).find((pattern) => { if (value.match(pattern)) { propertyType = PropertyType.DateTime return true } }) - - detectUnixTimestamps() } if ( diff --git a/plugin-server/tests/main/process-event.test.ts b/plugin-server/tests/main/process-event.test.ts index f7021a9d48c5e..71432287879c3 100644 --- a/plugin-server/tests/main/process-event.test.ts +++ b/plugin-server/tests/main/process-event.test.ts @@ -529,298 +529,301 @@ test('capture new person', async () => { last_seen_at: expect.any(String), }, ]) - expect(await hub.db.fetchPropertyDefinitions()).toEqual( - expect.arrayContaining([ - { - id: expect.any(String), - is_numerical: true, - name: 'distinct_id', - property_type: 'Numeric', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: 'token', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$browser', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$current_url', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$os', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: true, - name: '$browser_version', - property_type: 'Numeric', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$referring_domain', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$referrer', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: 'utm_medium', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: 'gclid', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: 'msclkid', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$ip', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 1, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: 'utm_medium', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: 'gclid', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: 'msclkid', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$initial_browser', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$initial_current_url', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$initial_os', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: true, - name: '$initial_browser_version', - property_type: 'Numeric', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$initial_referring_domain', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$initial_referrer', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$initial_utm_medium', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$initial_gclid', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - { - id: expect.any(String), - is_numerical: false, - name: '$initial_msclkid', - property_type: 'String', - property_type_format: null, - query_usage_30_day: null, - team_id: 2, - type: 2, - group_type_index: null, - volume_30_day: null, - }, - ]) - ) + const received = await hub.db.fetchPropertyDefinitions() + const expected = [ + { + id: expect.any(String), + is_numerical: true, + name: 'distinct_id', + property_type: 'Numeric', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: 'token', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$browser', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$current_url', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$os', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$browser_version', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$referring_domain', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$referrer', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: 'utm_medium', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: 'gclid', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: 'msclkid', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$ip', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 1, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: 'utm_medium', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: 'gclid', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: 'msclkid', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$initial_browser', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$initial_current_url', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$initial_os', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$initial_browser_version', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$initial_referring_domain', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$initial_referrer', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$initial_utm_medium', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$initial_gclid', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + { + id: expect.any(String), + is_numerical: false, + name: '$initial_msclkid', + property_type: 'String', + property_type_format: null, + query_usage_30_day: null, + team_id: 2, + type: 2, + group_type_index: null, + volume_30_day: null, + }, + ] + for (const element of expected) { + // Looping in an array to make it easier to debug + expect(received).toEqual(expect.arrayContaining([element])) + } }) test('capture bad team', async () => { diff --git a/plugin-server/tests/worker/ingestion/property-definitions-auto-discovery.test.ts b/plugin-server/tests/worker/ingestion/property-definitions-auto-discovery.test.ts index 0afa336ef5291..85f63daa1de52 100644 --- a/plugin-server/tests/worker/ingestion/property-definitions-auto-discovery.test.ts +++ b/plugin-server/tests/worker/ingestion/property-definitions-auto-discovery.test.ts @@ -4,7 +4,7 @@ import { detectPropertyDefinitionTypes } from '../../../src/worker/ingestion/pro describe('property definitions auto discovery', () => { describe('can detect numbers', () => { it('can detect "10"', () => { - expect(detectPropertyDefinitionTypes('10', 'anything')).toEqual(PropertyType.Numeric) + expect(detectPropertyDefinitionTypes('10', 'anything')).toEqual(PropertyType.String) }) it('can detect 10', () => { @@ -23,10 +23,6 @@ describe('property definitions auto discovery', () => { expect(detectPropertyDefinitionTypes(1.23, 'anything')).toEqual(PropertyType.Numeric) }) - it('can detect decimals in strings', () => { - expect(detectPropertyDefinitionTypes('1.23', 'anything')).toEqual(PropertyType.Numeric) - }) - it('can detect version numbers as non numeric', () => { expect(detectPropertyDefinitionTypes('1.2.3', 'anything')).toEqual(PropertyType.String) expect(detectPropertyDefinitionTypes('9.7.0', '$app_version')).toEqual(PropertyType.String) diff --git a/plugin-server/tests/worker/ingestion/property-definitions-manager.test.ts b/plugin-server/tests/worker/ingestion/property-definitions-manager.test.ts index 90c9941e0c403..dc5ed8d3ff988 100644 --- a/plugin-server/tests/worker/ingestion/property-definitions-manager.test.ts +++ b/plugin-server/tests/worker/ingestion/property-definitions-manager.test.ts @@ -7,10 +7,7 @@ import { PostgresUse } from '../../../src/utils/db/postgres' import { posthog } from '../../../src/utils/posthog' import { UUIDT } from '../../../src/utils/utils' import { GroupTypeManager } from '../../../src/worker/ingestion/group-type-manager' -import { - dateTimePropertyTypeFormatPatterns, - isNumericString, -} from '../../../src/worker/ingestion/property-definitions-auto-discovery' +import { dateTimePropertyTypeFormatPatterns } from '../../../src/worker/ingestion/property-definitions-auto-discovery' import { NULL_AFTER_PROPERTY_TYPE_DETECTION } from '../../../src/worker/ingestion/property-definitions-cache' import { PropertyDefinitionsManager } from '../../../src/worker/ingestion/property-definitions-manager' import { createOrganization, createOrganizationMembership, createTeam, createUser } from '../../helpers/sql' @@ -516,20 +513,19 @@ describe('PropertyDefinitionsManager()', () => { ]) }) - it('identifies a numeric type sent as a string', async () => { + it('identifies a numeric type sent as a string... as a string', async () => { await manager.updateEventNamesAndProperties(teamId, 'another_test_event', { some_number: String(randomInteger()), }) - expect(manager.propertyDefinitionsCache.get(teamId)?.peek('1some_number')).toEqual('Numeric') + expect(manager.propertyDefinitionsCache.get(teamId)?.peek('1some_number')).toEqual('String') expect(await hub.db.fetchPropertyDefinitions(teamId)).toEqual([ expect.objectContaining({ id: expect.any(String), team_id: teamId, name: 'some_number', - is_numerical: true, - property_type: 'Numeric', + property_type: 'String', }), ]) }) @@ -574,17 +570,17 @@ describe('PropertyDefinitionsManager()', () => { { propertyKey: 'unix timestamp with fractional seconds as a string', date: '1234567890.123', - expectedPropertyType: PropertyType.DateTime, + expectedPropertyType: PropertyType.String, }, { propertyKey: 'unix timestamp with five decimal places of fractional seconds as a string', date: '1234567890.12345', - expectedPropertyType: PropertyType.DateTime, + expectedPropertyType: PropertyType.String, }, { propertyKey: 'unix timestamp as a string', date: '1234567890', - expectedPropertyType: PropertyType.DateTime, + expectedPropertyType: PropertyType.String, }, { propertyKey: 'unix timestamp in milliseconds as a number', @@ -594,7 +590,7 @@ describe('PropertyDefinitionsManager()', () => { { propertyKey: 'unix timestamp in milliseconds as a string', date: '1234567890123', - expectedPropertyType: PropertyType.DateTime, + expectedPropertyType: PropertyType.String, }, ].flatMap((testcase) => { const toEdit = testcase @@ -607,7 +603,7 @@ describe('PropertyDefinitionsManager()', () => { const toNotMatch = { ...toEdit, propertyKey: toEdit.propertyKey.replace('timestamp', 'as a string'), - expectedPropertyType: isNumericString(toEdit.date) ? PropertyType.Numeric : PropertyType.String, + expectedPropertyType: typeof toEdit.date === 'number' ? PropertyType.Numeric : PropertyType.String, } return [testcase, toMatchWithJustTimeInName, toNotMatch]