Skip to content

Commit

Permalink
feat(web-analytics): Update session taxonomy (#21177)
Browse files Browse the repository at this point in the history
* Update session taxonomy

* Add missing descriptions

* Abstract `SESSION_PROPERTIES_ADAPTED_FROM_EVENT` away

* Add ad ids, and copy appropriate session properties from person properties

* Add test that all campaign properties have a definition

* Fix descriptions of session initial properties

---------

Co-authored-by: Michael Matloka <[email protected]>
  • Loading branch information
robbie-c and Twixes authored Mar 28, 2024
1 parent df6c124 commit 24e8176
Show file tree
Hide file tree
Showing 2 changed files with 202 additions and 34 deletions.
41 changes: 41 additions & 0 deletions frontend/src/lib/taxonomy.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import {
CAMPAIGN_PROPERTIES,
CORE_FILTER_DEFINITIONS_BY_GROUP,
SESSION_INITIAL_PROPERTIES_ADAPTED_FROM_EVENTS,
} from 'lib/taxonomy'

import { CoreFilterDefinition } from '~/types'

describe('taxonomy', () => {
describe('event properties', () => {
it('should have definitions for all campaign properties', () => {
const eventProperties = Object.keys(CORE_FILTER_DEFINITIONS_BY_GROUP.event_properties)
for (const property of CAMPAIGN_PROPERTIES) {
expect(eventProperties).toContain(property)
}
})
})

describe('person properties', () => {
// check that initial properties have been set up correctly
it('should have an $initial_referring_domain property', () => {
const property: CoreFilterDefinition =
CORE_FILTER_DEFINITIONS_BY_GROUP.person_properties['$initial_referring_domain']
expect(property.label).toEqual('Initial Referring Domain')
})
})

describe('session properties', () => {
const sessionPropertyNames = Object.keys(CORE_FILTER_DEFINITIONS_BY_GROUP.sessions)
it('should have an $initial_referring_domain property', () => {
const property: CoreFilterDefinition =
CORE_FILTER_DEFINITIONS_BY_GROUP.sessions['$initial_referring_domain']
expect(property.label).toEqual('Initial Referring Domain')
})
it(`should have every property in SESSION_PROPERTIES_ADAPTED_FROM_PERSON`, () => {
for (const property of Array.from(SESSION_INITIAL_PROPERTIES_ADAPTED_FROM_EVENTS.keys())) {
expect(sessionPropertyNames).toContain('$initial_' + property.replace('$', ''))
}
})
})
})
195 changes: 161 additions & 34 deletions frontend/src/lib/taxonomy.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,31 @@ import { CoreFilterDefinition, PropertyFilterValue } from '~/types'
import { TaxonomicFilterGroupType } from './components/TaxonomicFilter/types'
import { Link } from './lemon-ui/Link'

/** Same as https://github.com/PostHog/posthog-js/blob/master/src/utils/event-utils.ts */
// Ideally this would be imported from posthog-js, we just need to start exporting the list there
export const CAMPAIGN_PROPERTIES: string[] = [
'utm_source',
'utm_medium',
'utm_campaign',
'utm_content',
'utm_term',
'gclid', // google ads
'gad_source', // google ads
'gclsrc', // google ads 360
'dclid', // google display ads
'gbraid', // google ads, web to app
'wbraid', // google ads, app to web
'fbclid', // facebook
'msclkid', // microsoft
'twclid', // twitter
'li_fat_id', // linkedin
'mc_cid', // mailchimp campaign id
'igshid', // instagram
'ttclid', // tiktok
]

// copy from https://github.com/PostHog/posthog/blob/29ac8d6b2ba5de4b65a148136b681b8e52e20429/plugin-server/src/utils/db/utils.ts#L44
const eventToPersonProperties = new Set([
const PERSON_PROPERTIES_ADAPTED_FROM_EVENT = new Set([
// mobile params
'$app_build',
'$app_name',
Expand All @@ -20,19 +43,29 @@ const eventToPersonProperties = new Set([
'$os_version',
'$referring_domain',
'$referrer',
// campaign params - automatically added by posthog-js here https://github.com/PostHog/posthog-js/blob/master/src/utils/event-utils.ts
...CAMPAIGN_PROPERTIES,
])

export const SESSION_INITIAL_PROPERTIES_ADAPTED_FROM_EVENTS = new Set([
'$referring_domain',
'utm_source',
'utm_medium',
'utm_campaign',
'utm_medium',
'utm_content',
'utm_name',
'utm_term',
'gclid',
'gad_source',
'gclsrc',
'dclid',
'gbraid',
'wbraid',
'fbclid',
'msclkid',
'twclid',
'li_fat_id',
'mc_cid',
'igshid',
'ttclid',
])

// If adding event properties with labels, check whether they should be added to
Expand Down Expand Up @@ -905,6 +938,58 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
description: 'The previous build number for the app',
examples: ['1'],
},
gclid: {
label: 'gclid',
description: 'Google Click ID',
},
gad_source: {
label: 'gad_source',
description: 'Google Ads Source',
},
gclsrc: {
label: 'gclsrc',
description: 'Google Click Source',
},
dclid: {
label: 'dclid',
description: 'DoubleClick ID',
},
gbraid: {
label: 'gbraid',
description: 'Google Ads, web to app',
},
wbraid: {
label: 'wbraid',
description: 'Google Ads, app to web',
},
fbclid: {
label: 'fbclid',
description: 'Facebook Click ID',
},
msclkid: {
label: 'msclkid',
description: 'Microsoft Click ID',
},
twclid: {
label: 'twclid',
description: 'Twitter Click ID',
},
li_fat_id: {
label: 'li_fat_id',
description: 'LinkedIn First-Party Ad Tracking ID',
},
mc_cid: {
label: 'mc_cid',
description: 'Mailchimp Campaign ID',
},
igshid: {
label: 'igshid',
description: 'Instagram Share ID',
},
ttclid: {
label: 'ttclid',
description: 'TikTok Click ID',
},
},
numerical_event_properties: {}, // Same as event properties, see assignment below
person_properties: {}, // Currently person properties are the same as event properties, see assignment below
Expand All @@ -921,6 +1006,41 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
),
examples: ['01:04:12'],
},
$min_timestamp: {
label: 'First timestamp',
description: <span>The timestamp of the first event from this session.</span>,
examples: [new Date().toISOString()],
},
$max_timestamp: {
label: 'Last timestamp',
description: <span>The timestamp of the last event from this session</span>,
examples: [new Date().toISOString()],
},
$entry_url: {
label: 'Entry URL',
description: <span>The first URL visited in this session</span>,
examples: ['https://example.com/interesting-article?parameter=true'],
},
$exit_url: {
label: 'Entry URL',
description: <span>The last URL visited in this session</span>,
examples: ['https://example.com/interesting-article?parameter=true'],
},
$pageview_count: {
label: 'Pageview count',
description: <span>The number of page view events in this session</span>,
examples: ['123'],
},
$autocapture_count: {
label: 'Autocapture count',
description: <span>The number of autocapture events in this session</span>,
examples: ['123'],
},
$initial_channel_type: {
label: 'Channel type',
description: <span>What type of acquisition channel this traffic came from.</span>,
examples: ['Paid Search', 'Organic Video', 'Direct'],
},
},
groups: {
$group_key: {
Expand All @@ -933,36 +1053,43 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
CORE_FILTER_DEFINITIONS_BY_GROUP.numerical_event_properties = CORE_FILTER_DEFINITIONS_BY_GROUP.event_properties
// add distinct_id to event properties before copying to person properties so it exists in person properties as well
CORE_FILTER_DEFINITIONS_BY_GROUP.event_properties.distinct_id = CORE_FILTER_DEFINITIONS_BY_GROUP.metadata.distinct_id
CORE_FILTER_DEFINITIONS_BY_GROUP.person_properties = Object.fromEntries(
Object.entries(CORE_FILTER_DEFINITIONS_BY_GROUP.event_properties).flatMap(([key, value]) =>
eventToPersonProperties.has(key) || key.startsWith('$geoip_')
? [
[
key,
{
...value,
label: `Latest ${value.label}`,
description:
'description' in value
? `${value.description} Data from the last time this user was seen.`
: 'Data from the last time this user was seen.',
},
],
[
`$initial_${key.replace(/^\$/, '')}`,
{
...value,
label: `Initial ${value.label}`,
description:
'description' in value
? `${value.description} Data from the first time this user was seen.`
: 'Data from the first time this user was seen.',
},
],
]
: [[key, value]]
)
)

CORE_FILTER_DEFINITIONS_BY_GROUP.person_properties = {}

for (const [key, value] of Object.entries(CORE_FILTER_DEFINITIONS_BY_GROUP.event_properties)) {
if (PERSON_PROPERTIES_ADAPTED_FROM_EVENT.has(key) || key.startsWith('$geoip_')) {
CORE_FILTER_DEFINITIONS_BY_GROUP.person_properties[key] = {
...value,
label: `Latest ${value.label}`,
description:
'description' in value
? `${value.description} Data from the last time this user was seen.`
: 'Data from the last time this user was seen.',
}

CORE_FILTER_DEFINITIONS_BY_GROUP.person_properties[`$initial_${key.replace(/^\$/, '')}`] = {
...value,
label: `Initial ${value.label}`,
description:
'description' in value
? `${value.description} Data from the first time this user was seen.`
: 'Data from the first time this user was seen.',
}
} else {
CORE_FILTER_DEFINITIONS_BY_GROUP.person_properties[key] = value
}
if (SESSION_INITIAL_PROPERTIES_ADAPTED_FROM_EVENTS.has(key)) {
CORE_FILTER_DEFINITIONS_BY_GROUP.sessions[`$initial_${key.replace(/^\$/, '')}`] = {
...value,
label: `Initial ${value.label}`,
description:
'description' in value
? `${value.description} Data from the first event in this session.`
: 'Data from the first event in this session.',
}
}
}

// We treat `$session_duration` as an event property in the context of series `math`, but it's fake in a sense
CORE_FILTER_DEFINITIONS_BY_GROUP.event_properties.$session_duration =
CORE_FILTER_DEFINITIONS_BY_GROUP.sessions.$session_duration
Expand Down

0 comments on commit 24e8176

Please sign in to comment.