From 3c459e44bf7cabd1e531c4a17a6a98092568f657 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Thu, 19 Dec 2024 14:57:55 +0000 Subject: [PATCH 1/2] Add timestampp utils and uuidv7 code --- plugin-server/src/utils/utils.ts | 47 ++++++++++ .../src/worker/ingestion/timestamps.ts | 27 ++++++ plugin-server/tests/utils.test.ts | 23 +++++ .../tests/worker/ingestion/timestamps.test.ts | 91 ++++++++++++++++++- 4 files changed, 187 insertions(+), 1 deletion(-) diff --git a/plugin-server/src/utils/utils.ts b/plugin-server/src/utils/utils.ts index ccaf793c21c93..afce3ee1766fc 100644 --- a/plugin-server/src/utils/utils.ts +++ b/plugin-server/src/utils/utils.ts @@ -211,6 +211,53 @@ export class UUIDT extends UUID { } } +export class UUID7 extends UUID { + constructor(bufferOrUnixTimeMs?: number | Buffer, rand?: Buffer) { + if (bufferOrUnixTimeMs instanceof Buffer) { + if (bufferOrUnixTimeMs.length !== 16) { + throw new Error(`UUID7 from buffer requires 16 bytes, got ${bufferOrUnixTimeMs.length}`) + } + super(bufferOrUnixTimeMs) + return + } + const unixTimeMs = bufferOrUnixTimeMs ?? DateTime.utc().toMillis() + let unixTimeMsBig = BigInt(unixTimeMs) + + if (!rand) { + rand = randomBytes(10) + } else if (rand.length !== 10) { + throw new Error(`UUID7 requires 10 bytes of random data, got ${rand.length}`) + } + + // see https://www.rfc-editor.org/rfc/rfc9562#name-uuid-version-7 + // a UUIDv7 is 128 bits (16 bytes) total + // 48 bits for unix_ts_ms, + // 4 bits for ver = 0b111 (7) + // 12 bits for rand_a + // 2 bits for var = 0b10 + // 62 bits for rand_b + // we set fully random values for rand_a and rand_b + + const array = new Uint8Array(16) + // 48 bits for time, WILL FAIL in 10 895 CE + // XXXXXXXX-XXXX-****-****-************ + for (let i = 5; i >= 0; i--) { + array[i] = Number(unixTimeMsBig & 0xffn) // use last 8 binary digits to set UUID 2 hexadecimal digits + unixTimeMsBig >>= 8n // remove these last 8 binary digits + } + // rand_a and rand_b + // ********-****-*XXX-XXXX-XXXXXXXXXXXX + array.set(rand, 6) + + // ver and var + // ********-****-7***-X***-************ + array[6] = 0b0111_0000 | (array[6] & 0b0000_1111) + array[8] = 0b1000_0000 | (array[8] & 0b0011_1111) + + super(array) + } +} + /* Format timestamps. Allowed timestamp formats support ISO and ClickHouse formats according to `timestampFormat`. This distinction is relevant because ClickHouse does NOT diff --git a/plugin-server/src/worker/ingestion/timestamps.ts b/plugin-server/src/worker/ingestion/timestamps.ts index bf1e82f4dffdf..c41a5e33757ea 100644 --- a/plugin-server/src/worker/ingestion/timestamps.ts +++ b/plugin-server/src/worker/ingestion/timestamps.ts @@ -119,3 +119,30 @@ export function parseDate(supposedIsoString: string): DateTime { } return DateTime.fromJSDate(jsDate).toUTC() } + +export function toYearMonthDayInTimezone( + timestamp: number, + timeZone: string +): { year: number; month: number; day: number } { + const parts = new Intl.DateTimeFormat('en', { + timeZone, + year: 'numeric', + month: '2-digit', + day: '2-digit', + }).formatToParts(new Date(timestamp)) + const year = parts.find((part) => part.type === 'year')?.value + const month = parts.find((part) => part.type === 'month')?.value + const day = parts.find((part) => part.type === 'day')?.value + if (!year || !month || !day) { + throw new Error('Failed to get year, month, or day') + } + return { year: Number(year), month: Number(month), day: Number(day) } +} + +export function toStartOfDayInTimezone(timestamp: number, timeZone: string): Date { + const { year, month, day } = toYearMonthDayInTimezone(timestamp, timeZone) + return DateTime.fromObject( + { year, month, day, hour: 0, minute: 0, second: 0, millisecond: 0 }, + { zone: timeZone } + ).toJSDate() +} diff --git a/plugin-server/tests/utils.test.ts b/plugin-server/tests/utils.test.ts index 9fd37dd5b12fd..5f2be53ebb846 100644 --- a/plugin-server/tests/utils.test.ts +++ b/plugin-server/tests/utils.test.ts @@ -13,6 +13,7 @@ import { sanitizeSqlIdentifier, stringify, UUID, + UUID7, UUIDT, } from '../src/utils/utils' @@ -124,6 +125,28 @@ describe('utils', () => { }) }) + describe('UUIDv7', () => { + it('is well-formed', () => { + const uuid7 = new UUID7() + const uuid7String = uuid7.toString() + // UTC timestamp matching (roughly, only comparing the beginning as the timestamp's end inevitably drifts away) + expect(uuid7String.slice(0, 8)).toEqual(Date.now().toString(16).padStart(12, '0').slice(0, 8)) + // version digit matching + expect(uuid7String[14]).toEqual('7') + // var matching + const variant = parseInt(uuid7String[19], 16) >>> 2 + expect(variant).toEqual(2) + }) + it('has the correct value when given a timestamp and random bytes', () => { + const timestamp = new Date('Wed, 30 Oct 2024 21:46:23 GMT').getTime() + const randomBytes = Buffer.from( + new Uint8Array([0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23]) + ) + const uuid7 = new UUID7(timestamp, randomBytes) + expect(uuid7.toString()).toEqual('0192df64-df98-7123-8567-89abcdef0123') + }) + }) + describe('sanitizeSqlIdentifier', () => { it('removes all characters that are neither letter, digit or underscore and adds quotes around identifier', () => { const rawIdentifier = 'some_field"; DROP TABLE actually_an_injection-9;' diff --git a/plugin-server/tests/worker/ingestion/timestamps.test.ts b/plugin-server/tests/worker/ingestion/timestamps.test.ts index a70844a349ae9..742a908aa87f3 100644 --- a/plugin-server/tests/worker/ingestion/timestamps.test.ts +++ b/plugin-server/tests/worker/ingestion/timestamps.test.ts @@ -1,7 +1,12 @@ import { PluginEvent } from '@posthog/plugin-scaffold' import { UUIDT } from '../../../src/utils/utils' -import { parseDate, parseEventTimestamp } from '../../../src/worker/ingestion/timestamps' +import { + parseDate, + parseEventTimestamp, + toStartOfDayInTimezone, + toYearMonthDayInTimezone, +} from '../../../src/worker/ingestion/timestamps' describe('parseDate()', () => { const timestamps = [ @@ -283,3 +288,87 @@ describe('parseEventTimestamp()', () => { expect(timestamp.toISO()).toEqual('2021-10-29T01:00:00.000Z') }) }) + +describe('toYearMonthDateInTimezone', () => { + it('returns the correct date in the correct timezone', () => { + expect(toYearMonthDayInTimezone(new Date('2024-12-13T10:00:00.000Z').getTime(), 'Europe/London')).toEqual({ + year: 2024, + month: 12, + day: 13, + }) + + // should be a day ahead due to time zones + expect(toYearMonthDayInTimezone(new Date('2024-12-13T23:00:00.000Z').getTime(), 'Asia/Tokyo')).toEqual({ + year: 2024, + month: 12, + day: 14, + }) + + // should be a day behind due to time zones + expect(toYearMonthDayInTimezone(new Date('2024-12-13T01:00:00.000Z').getTime(), 'America/Los_Angeles')).toEqual( + { + year: 2024, + month: 12, + day: 12, + } + ) + + // should be the same day due to no DST + expect(toYearMonthDayInTimezone(new Date('2024-12-13T00:00:00.000Z').getTime(), 'Europe/London')).toEqual({ + year: 2024, + month: 12, + day: 13, + }) + + // should be a different day due to DST (british summer time) + expect(toYearMonthDayInTimezone(new Date('2024-06-13T23:00:00.000Z').getTime(), 'Europe/London')).toEqual({ + year: 2024, + month: 6, + day: 14, + }) + }) + + it('should throw on invalid timezone', () => { + expect(() => toYearMonthDayInTimezone(new Date().getTime(), 'Invalid/Timezone')).toThrowError( + 'Invalid time zone' + ) + }) +}) + +describe('toStartOfDayInTimezone', () => { + it('returns the start of the day in the correct timezone', () => { + expect(toStartOfDayInTimezone(new Date('2024-12-13T10:00:00.000Z').getTime(), 'Europe/London')).toEqual( + new Date('2024-12-13T00:00:00Z') + ) + + // would be the following day in Asia/Tokyo, but should be the same day (just earlier) in UTC + expect(toStartOfDayInTimezone(new Date('2024-12-13T23:00:00.000Z').getTime(), 'Asia/Tokyo')).toEqual( + new Date('2024-12-13T15:00:00Z') + ) + + // would be the same day in Asia/Tokyo, but back in UTC time it should be the previous day (but later in the day) + expect(toStartOfDayInTimezone(new Date('2024-12-13T01:00:00.000Z').getTime(), 'Asia/Tokyo')).toEqual( + new Date('2024-12-12T15:00:00Z') + ) + + // would be the same day in America/Los_Angeles, but earlier in the day when converted to UTC + expect(toStartOfDayInTimezone(new Date('2024-12-13T23:00:00.000Z').getTime(), 'America/Los_Angeles')).toEqual( + new Date('2024-12-13T08:00:00Z') + ) + + // would be the previous day in America/Los_Angeles, and when converted to UTC it should stay the previous day + expect(toStartOfDayInTimezone(new Date('2024-12-13T01:00:00.000Z').getTime(), 'America/Los_Angeles')).toEqual( + new Date('2024-12-12T08:00:00Z') + ) + + // should be the same day due to no DST + expect(toStartOfDayInTimezone(new Date('2024-12-13T00:00:00.000Z').getTime(), 'Europe/London')).toEqual( + new Date('2024-12-13T00:00:00Z') + ) + + // should be a different day due to DST (british summer time) + expect(toStartOfDayInTimezone(new Date('2024-06-13T00:00:00.000Z').getTime(), 'Europe/London')).toEqual( + new Date('2024-06-12T23:00:00Z') + ) + }) +}) From 817dee4a08d258dc200be9f7210746bb50183914 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Thu, 19 Dec 2024 15:47:11 +0000 Subject: [PATCH 2/2] Test loading uuid7 from buffer --- plugin-server/tests/utils.test.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/plugin-server/tests/utils.test.ts b/plugin-server/tests/utils.test.ts index 5f2be53ebb846..44d690d798d60 100644 --- a/plugin-server/tests/utils.test.ts +++ b/plugin-server/tests/utils.test.ts @@ -145,6 +145,11 @@ describe('utils', () => { const uuid7 = new UUID7(timestamp, randomBytes) expect(uuid7.toString()).toEqual('0192df64-df98-7123-8567-89abcdef0123') }) + it('can be loaded from a buffer', () => { + const str = '0192df64df987123856789abcdef0123' + const uuid = new UUID7(new Buffer(str, 'hex')) + expect(uuid.toString().replace(/-/g, '')).toEqual(str) + }) }) describe('sanitizeSqlIdentifier', () => {