From b69188dd1cc7d30a5f25f790afc1f757b14df0dd Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 27 Aug 2024 17:14:26 +0200 Subject: [PATCH 01/75] very broken refactor --- plugin-server/src/cdp/cdp-consumers.ts | 300 +++++++++---------------- plugin-server/src/cdp/hog-masker.ts | 17 +- plugin-server/src/cdp/types.ts | 34 ++- 3 files changed, 132 insertions(+), 219 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index fef401d472927..b60eb97322d27 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -6,7 +6,6 @@ import { Counter, Histogram } from 'prom-client' import { KAFKA_APP_METRICS_2, KAFKA_CDP_FUNCTION_CALLBACKS, - KAFKA_CDP_FUNCTION_OVERFLOW, KAFKA_EVENTS_JSON, KAFKA_EVENTS_PLUGIN_INGESTION, KAFKA_LOG_ENTRIES, @@ -20,7 +19,7 @@ import { AppMetric2Type, Hub, RawClickHouseEvent, TeamId, TimestampFormat } from import { KafkaProducerWrapper } from '../utils/db/kafka-producer-wrapper' import { captureTeamEvent } from '../utils/posthog' import { status } from '../utils/status' -import { castTimestampOrNow } from '../utils/utils' +import { castTimestampOrNow, UUIDT } from '../utils/utils' import { RustyHook } from '../worker/rusty-hook' import { AsyncFunctionExecutor } from './async-function-executor' import { GroupsManager } from './groups-manager' @@ -30,15 +29,14 @@ import { HogMasker } from './hog-masker' import { HogWatcher, HogWatcherState } from './hog-watcher' import { CdpRedis, createCdpRedisPool } from './redis' import { - CdpOverflowMessage, HogFunctionAsyncFunctionResponse, HogFunctionInvocation, HogFunctionInvocationAsyncRequest, HogFunctionInvocationAsyncResponse, HogFunctionInvocationGlobals, HogFunctionInvocationResult, + HogFunctionInvocationSerialized, HogFunctionMessageToProduce, - HogFunctionOverflowedGlobals, HogFunctionType, } from './types' import { @@ -223,6 +221,51 @@ abstract class CdpConsumerBase { }) } + protected createInvocation( + globals: HogFunctionInvocationGlobals, + hogFunction: HogFunctionType + ): HogFunctionInvocation { + // Add the source of the trigger to the globals + const modifiedGlobals: HogFunctionInvocationGlobals = { + ...globals, + source: { + name: hogFunction.name ?? `Hog function: ${hogFunction.id}`, + url: `${globals.project.url}/pipeline/destinations/hog-${hogFunction.id}/configuration/`, + }, + } + + return { + id: new UUIDT().toString(), + globals: modifiedGlobals, + team_id: hogFunction.team_id, + hogFunction, + queue: 'hog', + timings: [], + } + } + + protected async queueInvocation(invocation: HogFunctionInvocation) { + // Depending on flags we enqueue either to kafka or to cyclotron + + // TODO: Add cylcotron check here + // For now we just enqueue to kafka + + // For kafka style this is overkill to enqueue this way but it simplifies migrating to the new system + + // TODO: Convert to the right format for a job + const request: HogFunctionInvocationSerialized = { + state: await gzipObject(invocation), + } + + // NOTE: This is very temporary as it is producing the response. the response will actually be produced by the 3rd party service + // Later this will actually be the _request_ which we will push to the async function topic if we make one + this.messagesToProduce.push({ + topic: KAFKA_CDP_FUNCTION_CALLBACKS, + value: request, + key: invocation.hogFunction.id, + }) + } + protected async processInvocationResults(results: HogFunctionInvocationResult[]): Promise { await runInstrumentedFunction({ statsKey: `cdpConsumer.handleEachBatch.produceResults`, @@ -232,8 +275,8 @@ abstract class CdpConsumerBase { // Tricky: We want to pull all the logs out as we don't want them to be passed around to any subsequent functions this.produceAppMetric({ - team_id: result.invocation.teamId, - app_source_id: result.invocation.hogFunctionId, + team_id: result.invocation.team.id, + app_source_id: result.invocation.hogFunction.id, metric_kind: result.error ? 'failure' : 'success', metric_name: result.error ? 'failed' : 'succeeded', count: 1, @@ -257,91 +300,39 @@ abstract class CdpConsumerBase { }) } - if (result.asyncFunctionRequest) { - const request: HogFunctionInvocationAsyncRequest = { - state: await gzipObject(result.invocation), - teamId: result.invocation.teamId, - hogFunctionId: result.invocation.hogFunctionId, - asyncFunctionRequest: result.asyncFunctionRequest, - } - const res = await this.runWithHeartbeat(() => this.asyncFunctionExecutor.execute(request)) - - // NOTE: This is very temporary as it is producing the response. the response will actually be produced by the 3rd party service - // Later this will actually be the _request_ which we will push to the async function topic if we make one - if (res) { - this.messagesToProduce.push({ - topic: KAFKA_CDP_FUNCTION_CALLBACKS, - value: res, - key: res.hogFunctionId, - }) - } - } - }) - ) - }, - }) - } - - protected async executeAsyncResponses( - asyncResponses: HogFunctionInvocationAsyncResponse[] - ): Promise { - return await runInstrumentedFunction({ - statsKey: `cdpConsumer.handleEachBatch.executeAsyncResponses`, - func: async () => { - asyncResponses.forEach((x) => { - counterAsyncFunctionResponse.inc({ - outcome: x.asyncFunctionResponse.error ? 'failed' : 'succeeded', - }) - }) - - const invocationsWithResponses: [HogFunctionInvocation, HogFunctionAsyncFunctionResponse][] = [] - - // Deserialize the compressed data - await Promise.all( - asyncResponses.map(async (item) => { - try { - const invocation = await unGzipObject(item.state) - invocationsWithResponses.push([invocation, item.asyncFunctionResponse]) - } catch (e) { - status.error('Error unzipping message', e, item.state) - captureException(e, { - extra: { hogFunctionId: item.hogFunctionId, teamId: item.teamId }, - }) + if (!result.finished) { + // If it isn't finished then we need to put it back on the queue + await this.queueInvocation(result.invocation) } }) ) - - const results = await this.runManyWithHeartbeat(invocationsWithResponses, (item) => - this.hogExecutor.executeAsyncResponse(...item) - ) - - await this.hogWatcher.observeResults(results) - return results }, }) } - protected async executeMatchingFunctions( + /** + * Finds all matching hog functions for the given globals. + * Filters them for their disabled state as well as masking configs + * + */ + protected async queueMatchingFunctions( invocationGlobals: HogFunctionInvocationGlobals[] - ): Promise { + ): Promise { return await runInstrumentedFunction({ - statsKey: `cdpConsumer.handleEachBatch.executeMatchingFunctions`, + statsKey: `cdpConsumer.handleEachBatch.queueMatchingFunctions`, func: async () => { - const possibleInvocations: { globals: HogFunctionInvocationGlobals; hogFunction: HogFunctionType }[] = - [] + const possibleInvocations: HogFunctionInvocation[] = [] // TODO: Add a helper to hog functions to determine if they require groups or not and then only load those await this.groupsManager.enrichGroups(invocationGlobals) // Find all functions that could need running invocationGlobals.forEach((globals) => { + // TODO: Move that out of finding to somewhere else const { matchingFunctions, nonMatchingFunctions } = this.hogExecutor.findMatchingFunctions(globals) possibleInvocations.push( - ...matchingFunctions.map((hogFunction) => ({ - globals, - hogFunction, - })) + ...matchingFunctions.map((hogFunction) => this.createInvocation(globals, hogFunction)) ) nonMatchingFunctions.forEach((item) => @@ -391,45 +382,23 @@ abstract class CdpConsumerBase { }) }) - const overflowGlobalsAndFunctions: Record = {} - - const notOverflowedInvocations = notMaskedInvocations.filter((item) => { - const state = states[item.hogFunction.id].state - - if (state === HogWatcherState.degraded) { - const key = `${item.globals.project.id}-${item.globals.event.uuid}` - overflowGlobalsAndFunctions[key] = overflowGlobalsAndFunctions[key] || { - globals: item.globals, - hogFunctionIds: [], - } - - overflowGlobalsAndFunctions[key].hogFunctionIds.push(item.hogFunction.id) - counterFunctionInvocation.inc({ outcome: 'overflowed' }, 1) - return false - } + // We create the invocation here instead of in the function... + await this.runManyWithHeartbeat(notMaskedInvocations, (item) => + console.log('TODO: Create invocation here') + ) - return true - }) + return notMaskedInvocations - Object.values(overflowGlobalsAndFunctions).forEach((item) => { - this.messagesToProduce.push({ - topic: KAFKA_CDP_FUNCTION_OVERFLOW, - value: { - source: 'event_invocations', - payload: item, - }, - key: item.globals.event.uuid, - }) - }) + // TODO: Option for routing to cyclotron instead of kafka + // TODO: Include "priority" in the job so that we can prioritize certain functions + // const results = ( + // await this.runManyWithHeartbeat(notMaskedInvocations, (item) => + // this.hogExecutor.executeFunction(item.globals, item.hogFunction) + // ) + // ).filter((x) => !!x) as HogFunctionInvocationResult[] - const results = ( - await this.runManyWithHeartbeat(notOverflowedInvocations, (item) => - this.hogExecutor.executeFunction(item.globals, item.hogFunction) - ) - ).filter((x) => !!x) as HogFunctionInvocationResult[] - - await this.hogWatcher.observeResults(results) - return results + // await this.hogWatcher.observeResults(results) + // return results }, }) } @@ -534,7 +503,7 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { return } - const invocationResults = await this.runWithHeartbeat(() => this.executeMatchingFunctions(invocationGlobals)) + const invocationResults = await this.runWithHeartbeat(() => this.queueMatchingFunctions(invocationGlobals)) await this.processInvocationResults(invocationResults) } @@ -592,88 +561,45 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { return } - const invocationResults = await this.runWithHeartbeat(() => this.executeAsyncResponses(events)) - - await this.processInvocationResults(invocationResults) - } - - private parseMessages(messages: Message[]): HogFunctionInvocationAsyncResponse[] { - const events: HogFunctionInvocationAsyncResponse[] = [] - messages.map((message) => { - try { - const event = JSON.parse(message.value!.toString()) - events.push(event as HogFunctionInvocationAsyncResponse) - } catch (e) { - status.error('Error parsing message', e) - } - }) + // TODO: This needs to handle the new kind of state schedulign and the old kind from rusty hook - return events - } -} - -/** - * This consumer handles overflow for both incoming events as well as callbacks. - * In the future we might want multiple consumers but for now this is fine. - */ - -export class CdpOverflowConsumer extends CdpConsumerBase { - protected name = 'CdpOverflowConsumer' - protected topic = KAFKA_CDP_FUNCTION_OVERFLOW - protected consumerGroupId = 'cdp-overflow-consumer' - - public async _handleEachBatch(messages: Message[]): Promise { - const overflowedGlobals = await this.runWithHeartbeat(() => - runInstrumentedFunction({ - statsKey: `cdpConsumer.handleEachBatch.parseKafkaMessages`, - func: () => Promise.resolve(this.parseMessages(messages)), - }) - ) - - const invocationResults = await this.executeOverflowedFunctions(overflowedGlobals) + const invocationResults = await this.runWithHeartbeat(() => this.executeAsyncResponses(events)) await this.processInvocationResults(invocationResults) } - protected async executeOverflowedFunctions( - invocationGlobals: HogFunctionOverflowedGlobals[] + protected async executeAsyncResponses( + asyncResponses: HogFunctionInvocationAsyncResponse[] ): Promise { return await runInstrumentedFunction({ - statsKey: `cdpConsumer.handleEachBatch.executeOverflowedFunctions`, + statsKey: `cdpConsumer.handleEachBatch.executeAsyncResponses`, func: async () => { - // TODO: Add a helper to hog functions to determine if they require groups or not and then only load those - await this.groupsManager.enrichGroups(invocationGlobals.map((x) => x.globals)) - - const invocations = invocationGlobals - .map((item) => - item.hogFunctionIds.map((hogFunctionId) => ({ - globals: item.globals, - hogFunctionId, - })) - ) - .flat() - - const states = await this.hogWatcher.getStates(invocationGlobals.map((x) => x.hogFunctionIds).flat()) - - const results = ( - await this.runManyWithHeartbeat(invocations, (item) => { - const state = states[item.hogFunctionId].state - if (state >= HogWatcherState.disabledForPeriod) { - this.produceAppMetric({ - team_id: item.globals.project.id, - app_source_id: item.hogFunctionId, - metric_kind: 'failure', - metric_name: - state === HogWatcherState.disabledForPeriod - ? 'disabled_temporarily' - : 'disabled_permanently', - count: 1, + asyncResponses.forEach((x) => { + counterAsyncFunctionResponse.inc({ + outcome: x.asyncFunctionResponse.error ? 'failed' : 'succeeded', + }) + }) + + const invocationsWithResponses: [HogFunctionInvocation, HogFunctionAsyncFunctionResponse][] = [] + + // Deserialize the compressed data + await Promise.all( + asyncResponses.map(async (item) => { + try { + const invocation = await unGzipObject(item.state) + invocationsWithResponses.push([invocation, item.asyncFunctionResponse]) + } catch (e) { + status.error('Error unzipping message', e, item.state) + captureException(e, { + extra: { hogFunctionId: item.hogFunctionId, teamId: item.teamId }, }) - return } - return this.hogExecutor.executeFunction(item.globals, item.hogFunctionId) }) - ).filter((x) => !!x) as HogFunctionInvocationResult[] + ) + + const results = await this.runManyWithHeartbeat(invocationsWithResponses, (item) => + this.hogExecutor.executeAsyncResponse(...item) + ) await this.hogWatcher.observeResults(results) return results @@ -681,22 +607,18 @@ export class CdpOverflowConsumer extends CdpConsumerBase { }) } - private parseMessages(messages: Message[]): HogFunctionOverflowedGlobals[] { - const invocationGlobals: HogFunctionOverflowedGlobals[] = [] + private parseMessages(messages: Message[]): HogFunctionInvocationAsyncResponse[] { + const events: HogFunctionInvocationAsyncResponse[] = [] messages.map((message) => { try { - const parsed = JSON.parse(message.value!.toString()) as CdpOverflowMessage - - if (parsed.source === 'event_invocations') { - invocationGlobals.push(parsed.payload) - } + const event = JSON.parse(message.value!.toString()) + events.push(event as HogFunctionInvocationAsyncResponse) } catch (e) { - // TODO: We probably want to crash here right as this means something went really wrong and needs investigating? status.error('Error parsing message', e) } }) - return invocationGlobals + return events } } diff --git a/plugin-server/src/cdp/hog-masker.ts b/plugin-server/src/cdp/hog-masker.ts index 8dcfa52ae4398..28066d5b77157 100644 --- a/plugin-server/src/cdp/hog-masker.ts +++ b/plugin-server/src/cdp/hog-masker.ts @@ -2,7 +2,7 @@ import { exec } from '@posthog/hogvm' import { createHash } from 'crypto' import { CdpRedis } from './redis' -import { HogFunctionInvocationGlobals, HogFunctionType } from './types' +import { HogFunctionInvocation } from './types' export const BASE_REDIS_KEY = process.env.NODE_ENV == 'test' ? '@posthog-test/hog-masker' : '@posthog/hog-masker' const REDIS_KEY_TOKENS = `${BASE_REDIS_KEY}/mask` @@ -20,12 +20,7 @@ type MaskContext = { threshold: number | null } -type HogInvocationContext = { - globals: HogFunctionInvocationGlobals - hogFunction: HogFunctionType -} - -type HogInvocationContextWithMasker = HogInvocationContext & { +type HogInvocationContextWithMasker = HogFunctionInvocation & { masker?: MaskContext } @@ -39,9 +34,9 @@ type HogInvocationContextWithMasker = HogInvocationContext & { export class HogMasker { constructor(private redis: CdpRedis) {} - public async filterByMasking(invocations: HogInvocationContext[]): Promise<{ - masked: HogInvocationContext[] - notMasked: HogInvocationContext[] + public async filterByMasking(invocations: HogFunctionInvocation[]): Promise<{ + masked: HogFunctionInvocation[] + notMasked: HogFunctionInvocation[] }> { const invocationsWithMasker: HogInvocationContextWithMasker[] = [...invocations] const masks: Record = {} @@ -124,7 +119,7 @@ export class HogMasker { } return acc }, - { masked: [], notMasked: [] } as { masked: HogInvocationContext[]; notMasked: HogInvocationContext[] } + { masked: [], notMasked: [] } as { masked: HogFunctionInvocation[]; notMasked: HogFunctionInvocation[] } ) } } diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index b4b8155d971f0..9583507d64e51 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -7,6 +7,7 @@ import { ElementPropertyFilter, EventPropertyFilter, PersonPropertyFilter, + Team, } from '../types' export type HogBytecode = any[] @@ -99,11 +100,6 @@ export type HogFunctionInvocationGlobalsWithInputs = HogFunctionInvocationGlobal inputs: Record } -export type HogFunctionOverflowedGlobals = { - hogFunctionIds: HogFunctionType['id'][] - globals: HogFunctionInvocationGlobals -} - export type HogFunctionFilterGlobals = { // Filter Hog is built in the same way as analytics so the global object is meant to be an event event: string @@ -157,12 +153,13 @@ export interface HogFunctionTiming { duration_ms: number } -// This is the "persistent" state of a hog function invocation export type HogFunctionInvocation = { id: string globals: HogFunctionInvocationGlobals - teamId: number - hogFunctionId: HogFunctionType['id'] + team_id: Team['id'] + hogFunction: HogFunctionType + queue: 'hog' | 'fetch' + queueParameters?: any // The current vmstate (set if the invocation is paused) vmState?: VMState timings: HogFunctionTiming[] @@ -190,7 +187,7 @@ export type HogFunctionInvocationResult = { invocation: HogFunctionInvocation finished: boolean error?: any - asyncFunctionRequest?: HogFunctionAsyncFunctionRequest + // asyncFunctionRequest?: HogFunctionAsyncFunctionRequest logs: LogEntry[] capturedPostHogEvents?: HogFunctionCapturedEvent[] } @@ -206,11 +203,13 @@ export type HogFunctionInvocationAsyncResponse = { state: string // Serialized HogFunctionInvocation teamId: number hogFunctionId: HogFunctionType['id'] - - // FOLLOWUP: do we want to type this more strictly? asyncFunctionResponse: HogFunctionAsyncFunctionResponse } +export type HogFunctionInvocationSerialized = { + state: string // Serialized HogFunctionInvocation +} + // Mostly copied from frontend types export type HogFunctionInputSchemaType = { type: 'string' | 'boolean' | 'dictionary' | 'choice' | 'json' | 'integration' | 'integration_field' @@ -259,16 +258,13 @@ export type IntegrationType = { created_by_id?: number } -type CdpOverflowMessageInvocations = { - source: 'event_invocations' - payload: HogFunctionOverflowedGlobals -} - -export type CdpOverflowMessage = CdpOverflowMessageInvocations - export type HogFunctionMessageToProduce = { topic: string - value: CdpOverflowMessage | HogFunctionLogEntrySerialized | HogFunctionInvocationAsyncResponse | AppMetric2Type + value: + | HogFunctionLogEntrySerialized + | HogFunctionInvocationAsyncResponse + | AppMetric2Type + | HogFunctionInvocationSerialized key: string } From ee3166a84cff63704a2579403752e2af05686a87 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 29 Aug 2024 14:11:29 +0200 Subject: [PATCH 02/75] Refactor --- frontend/src/scenes/userLogic.ts | 1 + plugin-server/src/cdp/cdp-consumers.ts | 641 ++++++++++++------------- plugin-server/src/cdp/hog-executor.ts | 212 ++++---- plugin-server/src/cdp/types.ts | 2 +- 4 files changed, 402 insertions(+), 454 deletions(-) diff --git a/frontend/src/scenes/userLogic.ts b/frontend/src/scenes/userLogic.ts index 9db1e96fa8806..a15f205a46064 100644 --- a/frontend/src/scenes/userLogic.ts +++ b/frontend/src/scenes/userLogic.ts @@ -131,6 +131,7 @@ export const userLogic = kea([ posthog.register({ is_demo_project: user.team?.is_demo, + is_impersonated: user.is_impersonated, }) if (user.team) { diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index b60eb97322d27..de3a4e75d1d5f 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -1,6 +1,6 @@ import cyclotron from '@posthog/cyclotron' import { captureException } from '@sentry/node' -import { features, librdkafkaVersion, Message } from 'node-rdkafka' +import { Message } from 'node-rdkafka' import { Counter, Histogram } from 'prom-client' import { @@ -11,11 +11,11 @@ import { KAFKA_LOG_ENTRIES, } from '../config/kafka-topics' import { BatchConsumer, startBatchConsumer } from '../kafka/batch-consumer' -import { createRdConnectionConfigFromEnvVars, createRdProducerConfigFromEnvVars } from '../kafka/config' -import { createKafkaProducer } from '../kafka/producer' +import { createRdConnectionConfigFromEnvVars } from '../kafka/config' import { addSentryBreadcrumbsEventListeners } from '../main/ingestion-queues/kafka-metrics' import { runInstrumentedFunction } from '../main/utils' import { AppMetric2Type, Hub, RawClickHouseEvent, TeamId, TimestampFormat } from '../types' +import { createKafkaProducerWrapper } from '../utils/db/hub' import { KafkaProducerWrapper } from '../utils/db/kafka-producer-wrapper' import { captureTeamEvent } from '../utils/posthog' import { status } from '../utils/status' @@ -29,9 +29,7 @@ import { HogMasker } from './hog-masker' import { HogWatcher, HogWatcherState } from './hog-watcher' import { CdpRedis, createCdpRedisPool } from './redis' import { - HogFunctionAsyncFunctionResponse, HogFunctionInvocation, - HogFunctionInvocationAsyncRequest, HogFunctionInvocationAsyncResponse, HogFunctionInvocationGlobals, HogFunctionInvocationResult, @@ -68,17 +66,31 @@ const counterFunctionInvocation = new Counter({ labelNames: ['outcome'], // One of 'failed', 'succeeded', 'overflowed', 'disabled', 'filtered' }) -const counterAsyncFunctionResponse = new Counter({ - name: 'cdp_async_function_response', - help: 'An async function response was received with an outcome', - labelNames: ['outcome'], // One of 'failed', 'succeeded', 'overflowed', 'disabled', 'filtered' -}) - export interface TeamIDWithConfig { teamId: TeamId | null consoleLogIngestionEnabled: boolean } +function createInvocation(globals: HogFunctionInvocationGlobals, hogFunction: HogFunctionType): HogFunctionInvocation { + // Add the source of the trigger to the globals + const modifiedGlobals: HogFunctionInvocationGlobals = { + ...globals, + source: { + name: hogFunction.name ?? `Hog function: ${hogFunction.id}`, + url: `${globals.project.url}/pipeline/destinations/hog-${hogFunction.id}/configuration/`, + }, + } + + return { + id: new UUIDT().toString(), + globals: modifiedGlobals, + team_id: hogFunction.team_id, + hogFunction, + queue: 'hog', + timings: [], + } +} + abstract class CdpConsumerBase { batchConsumer?: BatchConsumer hogFunctionManager: HogFunctionManager @@ -152,29 +164,9 @@ abstract class CdpConsumerBase { return results } - public async handleEachBatch(messages: Message[], heartbeat: () => void): Promise { - status.info('🔁', `${this.name} - handling batch`, { - size: messages.length, - }) - - this.heartbeat = heartbeat - - histogramKafkaBatchSize.observe(messages.length) - histogramKafkaBatchSizeKb.observe(messages.reduce((acc, m) => (m.value?.length ?? 0) + acc, 0) / 1024) - - return await runInstrumentedFunction({ - statsKey: `cdpConsumer.handleEachBatch`, - sendTimeoutGuardToSentry: false, - func: async () => { - await this._handleEachBatch(messages) - await this.produceQueuedMessages() - }, - }) - } - - protected abstract _handleEachBatch(messages: Message[]): Promise + protected abstract _handleKafkaBatch(messages: Message[]): Promise - private async produceQueuedMessages() { + protected async produceQueuedMessages() { const messages = [...this.messagesToProduce] this.messagesToProduce = [] await Promise.all( @@ -221,27 +213,12 @@ abstract class CdpConsumerBase { }) } - protected createInvocation( - globals: HogFunctionInvocationGlobals, - hogFunction: HogFunctionType - ): HogFunctionInvocation { - // Add the source of the trigger to the globals - const modifiedGlobals: HogFunctionInvocationGlobals = { - ...globals, - source: { - name: hogFunction.name ?? `Hog function: ${hogFunction.id}`, - url: `${globals.project.url}/pipeline/destinations/hog-${hogFunction.id}/configuration/`, - }, - } - - return { - id: new UUIDT().toString(), - globals: modifiedGlobals, - team_id: hogFunction.team_id, - hogFunction, - queue: 'hog', - timings: [], - } + protected async queueInvocations(invocation: HogFunctionInvocation[]) { + await Promise.all( + invocation.map(async (item) => { + await this.queueInvocation(item) + }) + ) } protected async queueInvocation(invocation: HogFunctionInvocation) { @@ -266,56 +243,120 @@ abstract class CdpConsumerBase { }) } - protected async processInvocationResults(results: HogFunctionInvocationResult[]): Promise { - await runInstrumentedFunction({ - statsKey: `cdpConsumer.handleEachBatch.produceResults`, - func: async () => { - await Promise.all( - results.map(async (result) => { - // Tricky: We want to pull all the logs out as we don't want them to be passed around to any subsequent functions + protected async startKafkaConsumer() { + this.batchConsumer = await startBatchConsumer({ + connectionConfig: createRdConnectionConfigFromEnvVars(this.hub), + groupId: this.consumerGroupId, + topic: this.topic, + autoCommit: true, + sessionTimeout: this.hub.KAFKA_CONSUMPTION_SESSION_TIMEOUT_MS, + maxPollIntervalMs: this.hub.KAFKA_CONSUMPTION_MAX_POLL_INTERVAL_MS, + // the largest size of a message that can be fetched by the consumer. + // the largest size our MSK cluster allows is 20MB + // we only use 9 or 10MB but there's no reason to limit this 🤷️ + consumerMaxBytes: this.hub.KAFKA_CONSUMPTION_MAX_BYTES, + consumerMaxBytesPerPartition: this.hub.KAFKA_CONSUMPTION_MAX_BYTES_PER_PARTITION, + // our messages are very big, so we don't want to buffer too many + // queuedMinMessages: this.hub.KAFKA_QUEUE_SIZE, + consumerMaxWaitMs: this.hub.KAFKA_CONSUMPTION_MAX_WAIT_MS, + consumerErrorBackoffMs: this.hub.KAFKA_CONSUMPTION_ERROR_BACKOFF_MS, + fetchBatchSize: this.hub.INGESTION_BATCH_SIZE, + batchingTimeoutMs: this.hub.KAFKA_CONSUMPTION_BATCHING_TIMEOUT_MS, + topicCreationTimeoutMs: this.hub.KAFKA_TOPIC_CREATION_TIMEOUT_MS, + topicMetadataRefreshInterval: this.hub.KAFKA_TOPIC_METADATA_REFRESH_INTERVAL_MS, + eachBatch: async (messages, { heartbeat }) => { + status.info('🔁', `${this.name} - handling batch`, { + size: messages.length, + }) - this.produceAppMetric({ - team_id: result.invocation.team.id, - app_source_id: result.invocation.hogFunction.id, - metric_kind: result.error ? 'failure' : 'success', - metric_name: result.error ? 'failed' : 'succeeded', - count: 1, - }) + this.heartbeat = heartbeat - this.produceLogs(result) + histogramKafkaBatchSize.observe(messages.length) + histogramKafkaBatchSizeKb.observe(messages.reduce((acc, m) => (m.value?.length ?? 0) + acc, 0) / 1024) - // PostHog capture events - const capturedEvents = result.capturedPostHogEvents - delete result.capturedPostHogEvents + return await runInstrumentedFunction({ + statsKey: `cdpConsumer.handleEachBatch`, + sendTimeoutGuardToSentry: false, + func: async () => { + await this._handleKafkaBatch(messages) + }, + }) + }, + callEachBatchWhenEmpty: false, + }) - for (const event of capturedEvents ?? []) { - const team = await this.hub.teamManager.fetchTeam(event.team_id) - if (!team) { - continue - } - this.messagesToProduce.push({ - topic: KAFKA_EVENTS_PLUGIN_INGESTION, - value: convertToCaptureEvent(event, team), - key: `${team!.api_token}:${event.distinct_id}`, - }) - } + addSentryBreadcrumbsEventListeners(this.batchConsumer.consumer) - if (!result.finished) { - // If it isn't finished then we need to put it back on the queue - await this.queueInvocation(result.invocation) - } - }) - ) - }, + this.batchConsumer.consumer.on('disconnected', async (err) => { + // since we can't be guaranteed that the consumer will be stopped before some other code calls disconnect + // we need to listen to disconnect and make sure we're stopped + status.info('🔁', `${this.name} batch consumer disconnected, cleaning up`, { err }) + await this.stop() }) } + public async start(): Promise { + // NOTE: This is only for starting shared services + await Promise.all([ + this.hogFunctionManager.start(), + this.hub.CYCLOTRON_DATABASE_URL + ? cyclotron.initManager({ shards: [{ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }] }) + : Promise.resolve(), + ]) + + this.kafkaProducer = await createKafkaProducerWrapper(this.hub) + this.kafkaProducer.producer.connect() + } + + public async stop(): Promise { + status.info('🔁', `${this.name} - stopping`) + this.isStopping = true + + // Mark as stopping so that we don't actually process any more incoming messages, but still keep the process alive + status.info('🔁', `${this.name} - stopping batch consumer`) + await this.batchConsumer?.stop() + status.info('🔁', `${this.name} - stopping kafka producer`) + await this.kafkaProducer?.disconnect() + status.info('🔁', `${this.name} - stopping hog function manager and hog watcher`) + await Promise.all([this.hogFunctionManager.stop()]) + + status.info('👍', `${this.name} - stopped!`) + } + + public isHealthy() { + // TODO: Check either kafka consumer or cyclotron worker exists + // and that whatever exists is healthy + return this.batchConsumer?.isHealthy() + } +} + +/** + * This consumer handles incoming events from the main clickhouse topic + */ + +export class CdpProcessedEventsConsumer extends CdpConsumerBase { + protected name = 'CdpProcessedEventsConsumer' + protected topic = KAFKA_EVENTS_JSON + protected consumerGroupId = 'cdp-processed-events-consumer' + + public async processBatch(invocationGlobals: HogFunctionInvocationGlobals[]): Promise { + if (!invocationGlobals.length) { + return + } + + const invocationsToBeQueued = await this.runWithHeartbeat(() => + this.createHogFunctionInvocations(invocationGlobals) + ) + await this.queueInvocations(invocationsToBeQueued) + await this.produceQueuedMessages() + } + /** * Finds all matching hog functions for the given globals. * Filters them for their disabled state as well as masking configs * */ - protected async queueMatchingFunctions( + protected async createHogFunctionInvocations( invocationGlobals: HogFunctionInvocationGlobals[] ): Promise { return await runInstrumentedFunction({ @@ -332,7 +373,7 @@ abstract class CdpConsumerBase { const { matchingFunctions, nonMatchingFunctions } = this.hogExecutor.findMatchingFunctions(globals) possibleInvocations.push( - ...matchingFunctions.map((hogFunction) => this.createInvocation(globals, hogFunction)) + ...matchingFunctions.map((hogFunction) => createInvocation(globals, hogFunction)) ) nonMatchingFunctions.forEach((item) => @@ -382,11 +423,6 @@ abstract class CdpConsumerBase { }) }) - // We create the invocation here instead of in the function... - await this.runManyWithHeartbeat(notMaskedInvocations, (item) => - console.log('TODO: Create invocation here') - ) - return notMaskedInvocations // TODO: Option for routing to cyclotron instead of kafka @@ -403,275 +439,224 @@ abstract class CdpConsumerBase { }) } - public async start(): Promise { - status.info('🔁', `${this.name} - starting`, { - librdKafkaVersion: librdkafkaVersion, - kafkaCapabilities: features, - }) - - // NOTE: This is the only place where we need to use the shared server config - const globalConnectionConfig = createRdConnectionConfigFromEnvVars(this.hub) - const globalProducerConfig = createRdProducerConfigFromEnvVars(this.hub) - - await Promise.all([ - this.hogFunctionManager.start(), - this.hub.CYCLOTRON_DATABASE_URL - ? cyclotron.initManager({ shards: [{ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }] }) - : Promise.resolve(), - ]) - - this.kafkaProducer = new KafkaProducerWrapper( - await createKafkaProducer(globalConnectionConfig, globalProducerConfig) - ) - - this.kafkaProducer.producer.connect() - - this.batchConsumer = await startBatchConsumer({ - connectionConfig: createRdConnectionConfigFromEnvVars(this.hub), - groupId: this.consumerGroupId, - topic: this.topic, - autoCommit: true, - sessionTimeout: this.hub.KAFKA_CONSUMPTION_SESSION_TIMEOUT_MS, - maxPollIntervalMs: this.hub.KAFKA_CONSUMPTION_MAX_POLL_INTERVAL_MS, - // the largest size of a message that can be fetched by the consumer. - // the largest size our MSK cluster allows is 20MB - // we only use 9 or 10MB but there's no reason to limit this 🤷️ - consumerMaxBytes: this.hub.KAFKA_CONSUMPTION_MAX_BYTES, - consumerMaxBytesPerPartition: this.hub.KAFKA_CONSUMPTION_MAX_BYTES_PER_PARTITION, - // our messages are very big, so we don't want to buffer too many - // queuedMinMessages: this.hub.KAFKA_QUEUE_SIZE, - consumerMaxWaitMs: this.hub.KAFKA_CONSUMPTION_MAX_WAIT_MS, - consumerErrorBackoffMs: this.hub.KAFKA_CONSUMPTION_ERROR_BACKOFF_MS, - fetchBatchSize: this.hub.INGESTION_BATCH_SIZE, - batchingTimeoutMs: this.hub.KAFKA_CONSUMPTION_BATCHING_TIMEOUT_MS, - topicCreationTimeoutMs: this.hub.KAFKA_TOPIC_CREATION_TIMEOUT_MS, - topicMetadataRefreshInterval: this.hub.KAFKA_TOPIC_METADATA_REFRESH_INTERVAL_MS, - eachBatch: async (messages, { heartbeat }) => { - return await this.handleEachBatch(messages, heartbeat) - }, - callEachBatchWhenEmpty: false, - }) - - addSentryBreadcrumbsEventListeners(this.batchConsumer.consumer) - - this.batchConsumer.consumer.on('disconnected', async (err) => { - // since we can't be guaranteed that the consumer will be stopped before some other code calls disconnect - // we need to listen to disconnect and make sure we're stopped - status.info('🔁', `${this.name} batch consumer disconnected, cleaning up`, { err }) - await this.stop() - }) - } - - public async stop(): Promise { - status.info('🔁', `${this.name} - stopping`) - this.isStopping = true - - // Mark as stopping so that we don't actually process any more incoming messages, but still keep the process alive - status.info('🔁', `${this.name} - stopping batch consumer`) - await this.batchConsumer?.stop() - status.info('🔁', `${this.name} - stopping kafka producer`) - await this.kafkaProducer?.disconnect() - status.info('🔁', `${this.name} - stopping hog function manager and hog watcher`) - await Promise.all([this.hogFunctionManager.stop()]) - - status.info('👍', `${this.name} - stopped!`) - } - - public isHealthy() { - return this.batchConsumer?.isHealthy() - } -} - -/** - * This consumer handles incoming events from the main clickhouse topic - */ - -export class CdpProcessedEventsConsumer extends CdpConsumerBase { - protected name = 'CdpProcessedEventsConsumer' - protected topic = KAFKA_EVENTS_JSON - protected consumerGroupId = 'cdp-processed-events-consumer' - - public async _handleEachBatch(messages: Message[]): Promise { + // This consumer always parses from kafka + public async _handleKafkaBatch(messages: Message[]): Promise { const invocationGlobals = await this.runWithHeartbeat(() => runInstrumentedFunction({ statsKey: `cdpConsumer.handleEachBatch.parseKafkaMessages`, - func: async () => await this.parseMessages(messages), - }) - ) - - if (!invocationGlobals.length) { - return - } - - const invocationResults = await this.runWithHeartbeat(() => this.queueMatchingFunctions(invocationGlobals)) - - await this.processInvocationResults(invocationResults) - } - - private async parseMessages(messages: Message[]): Promise { - const events: HogFunctionInvocationGlobals[] = [] - await Promise.all( - messages.map(async (message) => { - try { - const clickHouseEvent = JSON.parse(message.value!.toString()) as RawClickHouseEvent - - if (!this.hogFunctionManager.teamHasHogFunctions(clickHouseEvent.team_id)) { - // No need to continue if the team doesn't have any functions - return - } - - const team = await this.hub.teamManager.fetchTeam(clickHouseEvent.team_id) - if (!team) { - return - } - events.push( - convertToHogFunctionInvocationGlobals( - clickHouseEvent, - team, - this.hub.SITE_URL ?? 'http://localhost:8000' - ) + func: async () => { + const events: HogFunctionInvocationGlobals[] = [] + await Promise.all( + messages.map(async (message) => { + try { + const clickHouseEvent = JSON.parse(message.value!.toString()) as RawClickHouseEvent + + if (!this.hogFunctionManager.teamHasHogFunctions(clickHouseEvent.team_id)) { + // No need to continue if the team doesn't have any functions + return + } + + const team = await this.hub.teamManager.fetchTeam(clickHouseEvent.team_id) + if (!team) { + return + } + events.push( + convertToHogFunctionInvocationGlobals( + clickHouseEvent, + team, + this.hub.SITE_URL ?? 'http://localhost:8000' + ) + ) + } catch (e) { + status.error('Error parsing message', e) + } + }) ) - } catch (e) { - status.error('Error parsing message', e) - } + + return events + }, }) ) - return events + await this.processBatch(invocationGlobals) } } /** - * This consumer handles callbacks from async functions. + * This consumer handles actually invoking hog in a loop */ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { protected name = 'CdpFunctionCallbackConsumer' protected topic = KAFKA_CDP_FUNCTION_CALLBACKS protected consumerGroupId = 'cdp-function-callback-consumer' - public async _handleEachBatch(messages: Message[]): Promise { - const events = await this.runWithHeartbeat(() => - runInstrumentedFunction({ - statsKey: `cdpConsumer.handleEachBatch.parseKafkaMessages`, - func: () => Promise.resolve(this.parseMessages(messages)), - }) - ) - - if (!events.length) { + public async processBatch(invocations: HogFunctionInvocation[]): Promise { + if (!invocations.length) { return } - - // TODO: This needs to handle the new kind of state schedulign and the old kind from rusty hook - - const invocationResults = await this.runWithHeartbeat(() => this.executeAsyncResponses(events)) - + const invocationResults = await this.runWithHeartbeat(() => this.processInvocations(invocations)) await this.processInvocationResults(invocationResults) } - protected async executeAsyncResponses( - asyncResponses: HogFunctionInvocationAsyncResponse[] - ): Promise { + protected async processInvocations(invocations: HogFunctionInvocation[]): Promise { + // These are either new invocations or responses + // The key thing we need to consider is taking the response, adding it to the invocation state and then continuing return await runInstrumentedFunction({ - statsKey: `cdpConsumer.handleEachBatch.executeAsyncResponses`, + statsKey: `cdpConsumer.handleEachBatch.executeInvocations`, func: async () => { - asyncResponses.forEach((x) => { - counterAsyncFunctionResponse.inc({ - outcome: x.asyncFunctionResponse.error ? 'failed' : 'succeeded', - }) - }) - - const invocationsWithResponses: [HogFunctionInvocation, HogFunctionAsyncFunctionResponse][] = [] - - // Deserialize the compressed data - await Promise.all( - asyncResponses.map(async (item) => { - try { - const invocation = await unGzipObject(item.state) - invocationsWithResponses.push([invocation, item.asyncFunctionResponse]) - } catch (e) { - status.error('Error unzipping message', e, item.state) - captureException(e, { - extra: { hogFunctionId: item.hogFunctionId, teamId: item.teamId }, - }) - } - }) - ) - - const results = await this.runManyWithHeartbeat(invocationsWithResponses, (item) => - this.hogExecutor.executeAsyncResponse(...item) - ) + // TODO: Handle if the invocation step is not "hog" so we should do fetch instead... + const results = await this.runManyWithHeartbeat(invocations, (item) => this.hogExecutor.execute(item)) await this.hogWatcher.observeResults(results) return results }, }) } - private parseMessages(messages: Message[]): HogFunctionInvocationAsyncResponse[] { - const events: HogFunctionInvocationAsyncResponse[] = [] - messages.map((message) => { - try { - const event = JSON.parse(message.value!.toString()) - events.push(event as HogFunctionInvocationAsyncResponse) - } catch (e) { - status.error('Error parsing message', e) - } - }) + protected async processInvocationResults(results: HogFunctionInvocationResult[]): Promise { + await runInstrumentedFunction({ + statsKey: `cdpConsumer.handleEachBatch.produceResults`, + func: async () => { + await Promise.all( + results.map(async (result) => { + // Tricky: We want to pull all the logs out as we don't want them to be passed around to any subsequent functions + this.produceAppMetric({ + team_id: result.invocation.team_id, + app_source_id: result.invocation.hogFunction.id, + metric_kind: result.error ? 'failure' : 'success', + metric_name: result.error ? 'failed' : 'succeeded', + count: 1, + }) - return events - } -} + this.produceLogs(result) -// TODO: Split out non-Kafka specific parts of CdpConsumerBase so that it can be used by the -// Cyclotron worker below. Or maybe we can just wait, and rip the Kafka bits out once Cyclotron is -// shipped (and rename it something other than consomer, probably). For now, this is an easy way to -// use existing code and get an end-to-end demo shipped. -export class CdpCyclotronWorker extends CdpConsumerBase { - protected name = 'CdpCyclotronWorker' - protected topic = 'UNUSED-CdpCyclotronWorker' - protected consumerGroupId = 'UNUSED-CdpCyclotronWorker' - private runningWorker: Promise | undefined - private isUnhealthy = false - - public async _handleEachBatch(_: Message[]): Promise { - // Not called, we override `start` below to use Cyclotron instead. - } + // PostHog capture events + const capturedEvents = result.capturedPostHogEvents + delete result.capturedPostHogEvents - private async innerStart() { - try { - const limit = 100 // TODO: Make configurable. - while (!this.isStopping) { - const jobs = await cyclotron.dequeueJobsWithVmState('hog', limit) - for (const job of jobs) { - // TODO: Reassemble a HogFunctionInvocationAsyncResponse (or whatever proper type) - // from the fields on the job, and then execute the next Hog step. - console.log(job.id) - } - } - } catch (err) { - this.isUnhealthy = true - console.error('Error in Cyclotron worker', err) - throw err - } - } + for (const event of capturedEvents ?? []) { + const team = await this.hub.teamManager.fetchTeam(event.team_id) + if (!team) { + continue + } + this.messagesToProduce.push({ + topic: KAFKA_EVENTS_PLUGIN_INGESTION, + value: convertToCaptureEvent(event, team), + key: `${team!.api_token}:${event.distinct_id}`, + }) + } - public async start() { - await cyclotron.initManager({ shards: [{ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }] }) - await cyclotron.initWorker({ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }) + if (!result.finished) { + // If it isn't finished then we need to put it back on the queue + await this.queueInvocation(result.invocation) + } + }) + ) + }, + }) + } - // Consumer `start` expects an async task is started, and not that `start` itself blocks - // indefinitely. - this.runningWorker = this.innerStart() + public async _handleKafkaBatch(messages: Message[]): Promise { + const events = await this.runWithHeartbeat(() => + runInstrumentedFunction({ + statsKey: `cdpConsumer.handleEachBatch.parseKafkaMessages`, + func: async () => { + // TRICKY: In the future we won't use kafka. For now though we need to parse messages as Cyclotron style jobs + // or hoghooks async callbacks + + const invocations: HogFunctionInvocation[] = [] + + // Parse the base message value + const entries: (HogFunctionInvocationAsyncResponse | HogFunctionInvocationSerialized)[] = messages + .map((message) => { + try { + return JSON.parse(message.value!.toString()) + } catch (e) { + status.error('Error parsing message', e) + } - return Promise.resolve() - } + return undefined + }) + .filter(Boolean) + + // Deserialize the compressed data + await Promise.all( + entries.map(async (item) => { + // If it looks like a + try { + const invocation = await unGzipObject(item.state) + if ('asyncFunctionResponse' in item) { + // This means it is a callback from hoghooks so we need to add the response to the invocation + invocation.queue = 'hog' + invocation.queueParameters = item.asyncFunctionResponse + } + invocations.push(invocation) + } catch (e) { + status.error('Error unzipping message', e, item.state) + captureException(e) + } + }) + ) - public async stop() { - await super.stop() - await this.runningWorker - } + return invocations + }, + }) + ) - public isHealthy() { - return this.isUnhealthy + await this.processBatch(events) } } + +// // TODO: Split out non-Kafka specific parts of CdpConsumerBase so that it can be used by the +// // Cyclotron worker below. Or maybe we can just wait, and rip the Kafka bits out once Cyclotron is +// // shipped (and rename it something other than consomer, probably). For now, this is an easy way to +// // use existing code and get an end-to-end demo shipped. +// export class CdpCyclotronWorker extends CdpConsumerBase { +// protected name = 'CdpCyclotronWorker' +// protected topic = 'UNUSED-CdpCyclotronWorker' +// protected consumerGroupId = 'UNUSED-CdpCyclotronWorker' +// private runningWorker: Promise | undefined +// private isUnhealthy = false + +// public async _handleEachBatch(_: Message[]): Promise { +// // Not called, we override `start` below to use Cyclotron instead. +// } + +// private async innerStart() { +// try { +// const limit = 100 // TODO: Make configurable. +// while (!this.isStopping) { +// const jobs = await cyclotron.dequeueJobsWithVmState('hog', limit) +// for (const job of jobs) { +// // TODO: Reassemble a HogFunctionInvocationAsyncResponse (or whatever proper type) +// // from the fields on the job, and then execute the next Hog step. +// console.log(job.id) +// } +// } +// } catch (err) { +// this.isUnhealthy = true +// console.error('Error in Cyclotron worker', err) +// throw err +// } +// } + +// public async start() { +// await cyclotron.initManager({ shards: [{ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }] }) +// await cyclotron.initWorker({ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }) + +// // Consumer `start` expects an async task is started, and not that `start` itself blocks +// // indefinitely. +// this.runningWorker = this.innerStart() + +// return Promise.resolve() +// } + +// public async stop() { +// await super.stop() +// await this.runningWorker +// } + +// public isHealthy() { +// return this.isUnhealthy +// } +// } diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index 3307a9136c7c1..a5723736331bb 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -3,11 +3,9 @@ import { DateTime } from 'luxon' import { Histogram } from 'prom-client' import { status } from '../utils/status' -import { UUIDT } from '../utils/utils' import { HogFunctionManager } from './hog-function-manager' import { HogFunctionInvocation, - HogFunctionInvocationAsyncResponse, HogFunctionInvocationGlobals, HogFunctionInvocationGlobalsWithInputs, HogFunctionInvocationResult, @@ -123,115 +121,11 @@ export class HogExecutor { } } - /** - * Intended to be invoked as a starting point from an event - */ - executeFunction( - event: HogFunctionInvocationGlobals, - functionOrId: HogFunctionType | HogFunctionType['id'] - ): HogFunctionInvocationResult | undefined { - const hogFunction = - typeof functionOrId === 'string' - ? this.hogFunctionManager.getTeamHogFunction(event.project.id, functionOrId) - : functionOrId - - if (!hogFunction) { - return - } - - // Add the source of the trigger to the globals - const modifiedGlobals: HogFunctionInvocationGlobals = { - ...event, - source: { - name: hogFunction.name ?? `Hog function: ${hogFunction.id}`, - url: `${event.project.url}/pipeline/destinations/hog-${hogFunction.id}/configuration/`, - }, - } - - return this.execute(hogFunction, { - id: new UUIDT().toString(), - globals: modifiedGlobals, - teamId: hogFunction.team_id, - hogFunctionId: hogFunction.id, - timings: [], - }) - } - - /** - * Intended to be invoked as a continuation from an async function - */ - executeAsyncResponse( - invocation: HogFunctionInvocation, - asyncFunctionResponse: HogFunctionInvocationAsyncResponse['asyncFunctionResponse'] - ): HogFunctionInvocationResult { - if (!invocation.hogFunctionId) { - throw new Error('No hog function id provided') - } - - const { logs = [], response = null, error: asyncError, timings = [] } = asyncFunctionResponse - - if (response?.status && response.status >= 400) { - // Generic warn log for bad status codes - logs.push({ - level: 'warn', - timestamp: DateTime.now(), - message: `Fetch returned bad status: ${response.status}`, - }) - } - - const errorRes = (error = 'Something went wrong'): HogFunctionInvocationResult => ({ - invocation, - finished: false, - error, - logs: [ - ...logs, - { - level: 'error', - timestamp: DateTime.now(), - message: error, - }, - ], - }) - - const hogFunction = this.hogFunctionManager.getTeamHogFunction( - invocation.globals.project.id, - invocation.hogFunctionId - ) - - if (!hogFunction || !invocation.vmState || asyncError) { - return errorRes( - !hogFunction - ? `Hog Function with ID ${invocation.hogFunctionId} not found` - : asyncError - ? asyncError - : 'No VM state provided for async response' - ) - } - - if (typeof response?.body === 'string') { - try { - response.body = JSON.parse(response.body) - } catch (e) { - // pass - if it isn't json we just pass it on - } - } - - // Add the response to the stack to continue execution - invocation.vmState.stack.push(response) - invocation.timings.push(...timings) - - const res = this.execute(hogFunction, invocation) - - // Add any timings and logs from the async function - res.logs = [...(logs ?? []), ...res.logs] - - return res - } - - execute(hogFunction: HogFunctionType, invocation: HogFunctionInvocation): HogFunctionInvocationResult { + execute(invocation: HogFunctionInvocation): HogFunctionInvocationResult { const loggingContext = { - hogFunctionId: hogFunction.id, - hogFunctionName: hogFunction.name, + invocationId: invocation.id, + hogFunctionId: invocation.hogFunction.id, + hogFunctionName: invocation.hogFunction.name, hogFunctionUrl: invocation.globals.source?.url, } @@ -239,7 +133,6 @@ export class HogExecutor { const result: HogFunctionInvocationResult = { invocation, - asyncFunctionRequest: undefined, finished: false, capturedPostHogEvents: [], logs: [], @@ -252,12 +145,53 @@ export class HogExecutor { }) try { + // If the queueParameter is set then we have an expected format that we want to parse and add to the stack + if (invocation.queueParameters) { + const { logs = [], response = null, error, timings = [] } = invocation.queueParameters + + // Special handling for fetch + // TODO: Would be good to have a dedicated value in the fetch response for the status code + if (response?.status && response.status >= 400) { + // Generic warn log for bad status codes + logs.push({ + level: 'warn', + timestamp: DateTime.now(), + message: `Fetch returned bad status: ${response.status}`, + }) + } + + if (!invocation.vmState) { + throw new Error("VM state wasn't provided for queue parameters") + } + + if (error) { + throw new Error(error) + } + + if (typeof response?.body === 'string') { + try { + response.body = JSON.parse(response.body) + } catch (e) { + // pass - if it isn't json we just pass it on + } + } + + // Add the response to the stack to continue execution + invocation.vmState!.stack.push(response) + invocation.timings.push(...timings) + result.logs = [...logs, ...result.logs] + + // Reset the queue parameters to be sure + invocation.queue = 'hog' + invocation.queueParameters = undefined + } + const start = performance.now() let globals: HogFunctionInvocationGlobalsWithInputs let execRes: ExecResult | undefined = undefined try { - globals = this.buildHogFunctionGlobals(hogFunction, invocation) + globals = this.buildHogFunctionGlobals(invocation) } catch (e) { result.logs.push({ level: 'error', @@ -268,11 +202,11 @@ export class HogExecutor { throw e } - const sensitiveValues = this.getSensitiveValues(hogFunction, globals.inputs) + const sensitiveValues = this.getSensitiveValues(invocation.hogFunction, globals.inputs) try { let hogLogs = 0 - execRes = exec(invocation.vmState ?? hogFunction.bytecode, { + execRes = exec(invocation.vmState ?? invocation.hogFunction.bytecode, { globals, timeout: DEFAULT_TIMEOUT_MS, // TODO: Swap this to milliseconds when the package is updated maxAsyncSteps: MAX_ASYNC_STEPS, // NOTE: This will likely be configurable in the future @@ -323,7 +257,7 @@ export class HogExecutor { } result.capturedPostHogEvents!.push({ - team_id: invocation.teamId, + team_id: invocation.team_id, timestamp: DateTime.utc().toISO(), distinct_id: event.distinct_id || invocation.globals.event.distinct_id, event: event.event, @@ -369,10 +303,37 @@ export class HogExecutor { }) if (execRes.asyncFunctionName) { - result.invocation.vmState = execRes.state - result.asyncFunctionRequest = { - name: execRes.asyncFunctionName, - args: args, + switch (execRes.asyncFunctionName) { + case 'fetch': + // Sanitize the args + const [url, fetchOptions] = execRes.asyncFunctionArgs as [ + string | undefined, + Record | undefined + ] + + if (typeof url !== 'string') { + throw new Error('fetch: Invalid URL') + } + + const method = fetchOptions?.method || 'POST' + const headers = fetchOptions?.headers || { + 'Content-Type': 'application/json', + } + let body = fetchOptions?.body + // Modify the body to ensure it is a string (we allow Hog to send an object to keep things simple) + body = body ? (typeof body === 'string' ? body : JSON.stringify(body)) : body + + result.invocation.queue = 'fetch' + result.invocation.queueParameters = { + url, + method, + headers, + body, + } + + break + default: + throw new Error(`Unknown async function '${execRes.asyncFunctionName}'`) } } else { result.logs.push({ @@ -397,19 +358,20 @@ export class HogExecutor { } } catch (err) { result.error = err.message - status.error('🦔', `[HogExecutor] Error executing function ${hogFunction.id} - ${hogFunction.name}`, err) + status.error( + '🦔', + `[HogExecutor] Error executing function ${invocation.hogFunction.id} - ${invocation.hogFunction.name}`, + err + ) } return result } - buildHogFunctionGlobals( - hogFunction: HogFunctionType, - invocation: HogFunctionInvocation - ): HogFunctionInvocationGlobalsWithInputs { + buildHogFunctionGlobals(invocation: HogFunctionInvocation): HogFunctionInvocationGlobalsWithInputs { const builtInputs: Record = {} - Object.entries(hogFunction.inputs ?? {}).forEach(([key, item]) => { + Object.entries(invocation.hogFunction.inputs ?? {}).forEach(([key, item]) => { builtInputs[key] = item.value if (item.bytecode) { diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index 9583507d64e51..0e55098f19159 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -159,7 +159,7 @@ export type HogFunctionInvocation = { team_id: Team['id'] hogFunction: HogFunctionType queue: 'hog' | 'fetch' - queueParameters?: any + queueParameters?: Record // The current vmstate (set if the invocation is paused) vmState?: VMState timings: HogFunctionTiming[] From a639c9d20b075f9446f6d89bcaa0711512210118 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 29 Aug 2024 15:27:33 +0200 Subject: [PATCH 03/75] Fixes all round --- plugin-server/src/cdp/cdp-consumers.ts | 4 +- plugin-server/src/cdp/hog-executor.ts | 8 +- plugin-server/src/cdp/hog-watcher.ts | 4 +- plugin-server/src/cdp/types.ts | 2 +- plugin-server/src/cdp/utils.ts | 2 +- plugin-server/tests/cdp/fixtures.ts | 64 ++++- plugin-server/tests/cdp/hog-executor.test.ts | 241 ++++++++---------- .../tests/cdp/hog-function-manager.test.ts | 2 +- plugin-server/tests/cdp/hog-masker.test.ts | 86 +++---- plugin-server/tests/cdp/hog-watcher.test.ts | 4 +- plugin-server/tests/cdp/utils.test.ts | 8 +- 11 files changed, 211 insertions(+), 214 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index de3a4e75d1d5f..777f3145326ff 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -84,7 +84,7 @@ function createInvocation(globals: HogFunctionInvocationGlobals, hogFunction: Ho return { id: new UUIDT().toString(), globals: modifiedGlobals, - team_id: hogFunction.team_id, + teamId: hogFunction.team_id, hogFunction, queue: 'hog', timings: [], @@ -521,7 +521,7 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { results.map(async (result) => { // Tricky: We want to pull all the logs out as we don't want them to be passed around to any subsequent functions this.produceAppMetric({ - team_id: result.invocation.team_id, + team_id: result.invocation.teamId, app_source_id: result.invocation.hogFunction.id, metric_kind: result.error ? 'failure' : 'success', metric_name: result.error ? 'failed' : 'succeeded', diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index a5723736331bb..d20ffac34b5da 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -257,7 +257,7 @@ export class HogExecutor { } result.capturedPostHogEvents!.push({ - team_id: invocation.team_id, + team_id: invocation.teamId, timestamp: DateTime.utc().toISO(), distinct_id: event.distinct_id || invocation.globals.event.distinct_id, event: event.event, @@ -283,6 +283,7 @@ export class HogExecutor { hogExecutionDuration.observe(duration) result.finished = execRes.finished + result.invocation.vmState = execRes.state invocation.timings.push({ kind: 'hog', duration_ms: duration, @@ -306,10 +307,7 @@ export class HogExecutor { switch (execRes.asyncFunctionName) { case 'fetch': // Sanitize the args - const [url, fetchOptions] = execRes.asyncFunctionArgs as [ - string | undefined, - Record | undefined - ] + const [url, fetchOptions] = args as [string | undefined, Record | undefined] if (typeof url !== 'string') { throw new Error('fetch: Invalid URL') diff --git a/plugin-server/src/cdp/hog-watcher.ts b/plugin-server/src/cdp/hog-watcher.ts index 0e80c1a3cac41..38a2a3901b776 100644 --- a/plugin-server/src/cdp/hog-watcher.ts +++ b/plugin-server/src/cdp/hog-watcher.ts @@ -124,7 +124,7 @@ export class HogWatcher { const costs: Record = {} results.forEach((result) => { - let cost = (costs[result.invocation.hogFunctionId] = costs[result.invocation.hogFunctionId] || 0) + let cost = (costs[result.invocation.hogFunction.id] = costs[result.invocation.hogFunction.id] || 0) if (result.finished) { // If it is finished we can calculate the score based off of the timings @@ -142,7 +142,7 @@ export class HogWatcher { cost += this.hub.CDP_WATCHER_COST_ERROR } - costs[result.invocation.hogFunctionId] = cost + costs[result.invocation.hogFunction.id] = cost }) const res = await this.redis.usePipeline({ name: 'checkRateLimits' }, (pipeline) => { diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index 0e55098f19159..99b0ca6d5d607 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -156,7 +156,7 @@ export interface HogFunctionTiming { export type HogFunctionInvocation = { id: string globals: HogFunctionInvocationGlobals - team_id: Team['id'] + teamId: Team['id'] hogFunction: HogFunctionType queue: 'hog' | 'fetch' queueParameters?: Record diff --git a/plugin-server/src/cdp/utils.ts b/plugin-server/src/cdp/utils.ts index cc49b63fc1eb6..f43c39739a66d 100644 --- a/plugin-server/src/cdp/utils.ts +++ b/plugin-server/src/cdp/utils.ts @@ -190,7 +190,7 @@ export const prepareLogEntriesForClickhouse = ( ...logEntry, team_id: result.invocation.teamId, log_source: 'hog_function', - log_source_id: result.invocation.hogFunctionId, + log_source_id: result.invocation.hogFunction.id, instance_id: result.invocation.id, timestamp: castTimestampOrNow(logEntry.timestamp, TimestampFormat.ClickHouse), } diff --git a/plugin-server/tests/cdp/fixtures.ts b/plugin-server/tests/cdp/fixtures.ts index b70af8efe3c39..ff962974ce9b2 100644 --- a/plugin-server/tests/cdp/fixtures.ts +++ b/plugin-server/tests/cdp/fixtures.ts @@ -1,22 +1,25 @@ import { randomUUID } from 'crypto' import { Message } from 'node-rdkafka' -import { HogFunctionInvocationGlobals, HogFunctionType, IntegrationType } from '../../src/cdp/types' +import { + HogFunctionInvocation, + HogFunctionInvocationGlobals, + HogFunctionType, + IntegrationType, +} from '../../src/cdp/types' import { ClickHouseTimestamp, RawClickHouseEvent, Team } from '../../src/types' import { PostgresRouter } from '../../src/utils/db/postgres' +import { UUIDT } from '../../src/utils/utils' import { insertRow } from '../helpers/sql' export const createHogFunction = (hogFunction: Partial) => { const item: HogFunctionType = { id: randomUUID(), + name: 'Hog Function', team_id: 1, - created_at: new Date().toISOString(), - updated_at: new Date().toISOString(), - created_by_id: 1001, enabled: true, - deleted: false, - description: '', hog: '', + bytecode: [], ...hogFunction, } @@ -68,14 +71,19 @@ export const insertHogFunction = async ( team_id: Team['id'], hogFunction: Partial = {} ): Promise => { - const res = await insertRow( - postgres, - 'posthog_hogfunction', - createHogFunction({ + // This is only used for testing so we need to override some values + + const res = await insertRow(postgres, 'posthog_hogfunction', { + ...createHogFunction({ ...hogFunction, team_id: team_id, - }) - ) + }), + description: '', + created_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + created_by_id: 1001, + deleted: false, + }) return res } @@ -99,6 +107,13 @@ export const createHogExecutionGlobals = ( data: Partial = {} ): HogFunctionInvocationGlobals => { return { + groups: {}, + person: { + uuid: 'person-uuid', + name: 'person', + url: 'http://localhost:8000/persons/1', + properties: {}, + }, ...data, project: { id: 1, @@ -119,3 +134,28 @@ export const createHogExecutionGlobals = ( }, } } + +export const createInvocation = ( + _hogFunction: Partial = {}, + _globals: Partial = {} +): HogFunctionInvocation => { + const hogFunction = createHogFunction(_hogFunction) + // Add the source of the trigger to the globals + let globals = createHogExecutionGlobals(_globals) + globals = { + ...globals, + source: { + name: hogFunction.name ?? `Hog function: ${hogFunction.id}`, + url: `${globals.project.url}/pipeline/destinations/hog-${hogFunction.id}/configuration/`, + }, + } + + return { + id: new UUIDT().toString(), + globals, + teamId: hogFunction.team_id, + hogFunction, + queue: 'hog', + timings: [], + } +} diff --git a/plugin-server/tests/cdp/hog-executor.test.ts b/plugin-server/tests/cdp/hog-executor.test.ts index ae5203f6415b0..3ce0a5a6c2e77 100644 --- a/plugin-server/tests/cdp/hog-executor.test.ts +++ b/plugin-server/tests/cdp/hog-executor.test.ts @@ -2,14 +2,14 @@ import { DateTime } from 'luxon' import { HogExecutor } from '../../src/cdp/hog-executor' import { HogFunctionManager } from '../../src/cdp/hog-function-manager' -import { - HogFunctionAsyncFunctionResponse, - HogFunctionInvocationResult, - HogFunctionType, - LogEntry, -} from '../../src/cdp/types' +import { HogFunctionAsyncFunctionResponse, HogFunctionType } from '../../src/cdp/types' import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' -import { createHogExecutionGlobals, createHogFunction, insertHogFunction as _insertHogFunction } from './fixtures' +import { + createHogExecutionGlobals, + createHogFunction, + createInvocation, + insertHogFunction as _insertHogFunction, +} from './fixtures' const createAsyncFunctionResponse = (response?: Record): HogFunctionAsyncFunctionResponse => { return { @@ -57,28 +57,35 @@ describe('Hog Executor', () => { mockFunctionManager.getTeamHogFunction.mockReturnValue(hogFunction) }) - it('can execute messages', () => { - const globals = createHogExecutionGlobals() - const results = executor - .findMatchingFunctions(createHogExecutionGlobals()) - .matchingFunctions.map((x) => executor.executeFunction(globals, x) as HogFunctionInvocationResult) - expect(results).toHaveLength(1) - expect(results[0]).toMatchObject({ + it('can execute an invocation', () => { + const invocation = createInvocation(hogFunction) + const result = executor.execute(invocation) + expect(result).toEqual({ + capturedPostHogEvents: [], invocation: { id: expect.any(String), - hogFunctionId: hogFunction.id, + teamId: 1, + globals: invocation.globals, + hogFunction: invocation.hogFunction, + queue: 'fetch', + queueParameters: expect.any(Object), + timings: [ + { + kind: 'hog', + duration_ms: 0, + }, + ], + vmState: expect.any(Object), }, finished: false, - asyncFunctionRequest: {}, + logs: expect.any(Array), }) }) it('collects logs from the function', () => { - const globals = createHogExecutionGlobals() - const results = executor - .findMatchingFunctions(createHogExecutionGlobals()) - .matchingFunctions.map((x) => executor.executeFunction(globals, x) as HogFunctionInvocationResult) - expect(results[0].logs).toMatchObject([ + const invocation = createInvocation(hogFunction) + const result = executor.execute(invocation) + expect(result.logs).toMatchObject([ { timestamp: expect.any(DateTime), level: 'debug', @@ -87,7 +94,7 @@ describe('Hog Executor', () => { { timestamp: expect.any(DateTime), level: 'debug', - message: "Suspending function due to async function call 'fetch'. Payload: 1299 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 1740 bytes", }, ]) }) @@ -97,10 +104,9 @@ describe('Hog Executor', () => { ...HOG_EXAMPLES.input_printer, ...HOG_INPUTS_EXAMPLES.secret_inputs, }) + const invocation = createInvocation(fn) + const result = executor.execute(invocation) - mockFunctionManager.getTeamHogFunctions.mockReturnValue([fn]) - - const result = executor.executeFunction(createHogExecutionGlobals(), fn) as HogFunctionInvocationResult expect(result.logs.map((x) => x.message)).toMatchInlineSnapshot(` Array [ "Executing function", @@ -115,18 +121,16 @@ describe('Hog Executor', () => { }) it('queues up an async function call', () => { - const globals = createHogExecutionGlobals() - const results = executor - .findMatchingFunctions(createHogExecutionGlobals()) - .matchingFunctions.map((x) => executor.executeFunction(globals, x) as HogFunctionInvocationResult) - expect(results[0]).toMatchObject({ - invocation: { - id: results[0].invocation.id, - teamId: 1, - hogFunctionId: hogFunction.id, - vmState: expect.any(Object), - globals: { - project: { id: 1, name: 'test', url: 'http://localhost:8000/projects/1' }, + const invocation = createInvocation(hogFunction) + const result = executor.execute(invocation) + + expect(result.invocation).toMatchObject({ + queue: 'fetch', + queueParameters: { + url: 'https://example.com/posthog-webhook', + method: 'POST', + headers: { version: 'v=1.2.3' }, + body: JSON.stringify({ event: { uuid: 'uuid', name: 'test', @@ -135,94 +139,63 @@ describe('Hog Executor', () => { properties: { $lib_version: '1.2.3' }, timestamp: '2024-06-07T12:00:00.000Z', }, - source: { - name: 'Test hog function', - url: `http://localhost:8000/projects/1/pipeline/destinations/hog-${hogFunction.id}/configuration/`, - }, - }, - timings: [ - { - kind: 'hog', - duration_ms: 0, - }, - ], - }, - - asyncFunctionRequest: { - name: 'fetch', - args: [ - 'https://example.com/posthog-webhook', - { - headers: { version: 'v=1.2.3' }, - body: { - event: { - uuid: 'uuid', - name: 'test', - distinct_id: 'distinct_id', - url: 'http://localhost:8000/events/1', - properties: { $lib_version: '1.2.3' }, - timestamp: '2024-06-07T12:00:00.000Z', - }, - groups: null, - nested: { foo: 'http://localhost:8000/events/1' }, - person: null, - event_url: 'http://localhost:8000/events/1-test', - }, - method: 'POST', + groups: {}, + nested: { foo: 'http://localhost:8000/events/1' }, + person: { + uuid: 'person-uuid', + name: 'person', + url: 'http://localhost:8000/persons/1', + properties: {}, }, - ], + event_url: 'http://localhost:8000/events/1-test', + }), }, }) }) it('executes the full function in a loop', () => { - const logs: LogEntry[] = [] - const globals = createHogExecutionGlobals() - const results = executor - .findMatchingFunctions(createHogExecutionGlobals()) - .matchingFunctions.map((x) => executor.executeFunction(globals, x) as HogFunctionInvocationResult) - const splicedLogs = results[0].logs.splice(0, 100) - logs.push(...splicedLogs) - - const asyncExecResult = executor.executeAsyncResponse(results[0].invocation, createAsyncFunctionResponse()) - - logs.push(...asyncExecResult.logs) - expect(asyncExecResult.error).toBeUndefined() - expect(asyncExecResult.finished).toBe(true) + const result = executor.execute(createInvocation(hogFunction)) + const logs = result.logs.splice(0, 100) + + expect(result.finished).toBe(false) + expect(result.invocation.queue).toBe('fetch') + expect(result.invocation.vmState).toBeDefined() + + // Simulate what the callback does + result.invocation.queue = 'hog' + result.invocation.queueParameters = createAsyncFunctionResponse() + + const secondResult = executor.execute(result.invocation) + logs.push(...secondResult.logs) + + expect(secondResult.finished).toBe(true) + expect(secondResult.error).toBeUndefined() expect(logs.map((log) => log.message)).toEqual([ 'Executing function', - "Suspending function due to async function call 'fetch'. Payload: 1299 bytes", + "Suspending function due to async function call 'fetch'. Payload: 1740 bytes", 'Resuming function', 'Fetch response:, {"status":200,"body":"success"}', - 'Function completed in 100ms. Sync: 0ms. Mem: 589 bytes. Ops: 22.', + 'Function completed in 100ms. Sync: 0ms. Mem: 722 bytes. Ops: 22.', ]) }) it('parses the responses body if a string', () => { - const logs: LogEntry[] = [] - const globals = createHogExecutionGlobals() - const results = executor - .findMatchingFunctions(createHogExecutionGlobals()) - .matchingFunctions.map((x) => executor.executeFunction(globals, x) as HogFunctionInvocationResult) - const splicedLogs = results[0].logs.splice(0, 100) - logs.push(...splicedLogs) - - const asyncExecResult = executor.executeAsyncResponse( - results[0].invocation, - createAsyncFunctionResponse({ - body: JSON.stringify({ foo: 'bar' }), - }) - ) + const result = executor.execute(createInvocation(hogFunction)) + const logs = result.logs.splice(0, 100) + result.invocation.queue = 'hog' + result.invocation.queueParameters = createAsyncFunctionResponse({ + body: JSON.stringify({ foo: 'bar' }), + }) + + const secondResult = executor.execute(result.invocation) + logs.push(...secondResult.logs) - logs.push(...asyncExecResult.logs) - expect(asyncExecResult.error).toBeUndefined() - expect(asyncExecResult.finished).toBe(true) expect(logs.map((log) => log.message)).toEqual([ 'Executing function', - "Suspending function due to async function call 'fetch'. Payload: 1299 bytes", + "Suspending function due to async function call 'fetch'. Payload: 1740 bytes", 'Resuming function', - 'Fetch response:, {"status":200,"body":{"foo":"bar"}}', // The body is parsed - 'Function completed in 100ms. Sync: 0ms. Mem: 589 bytes. Ops: 22.', + 'Fetch response:, {"status":200,"body":{"foo":"bar"}}', + 'Function completed in 100ms. Sync: 0ms. Mem: 722 bytes. Ops: 22.', ]) }) }) @@ -256,7 +229,7 @@ describe('Hog Executor', () => { }) }) - describe('async function responses', () => { + describe('async functions', () => { it('prevents large looped fetch calls', () => { const fn = createHogFunction({ ...HOG_EXAMPLES.recursive_fetch, @@ -264,27 +237,26 @@ describe('Hog Executor', () => { ...HOG_FILTERS_EXAMPLES.no_filters, }) - mockFunctionManager.getTeamHogFunctions.mockReturnValue([fn]) - // Simulate the recusive loop - const globals = createHogExecutionGlobals() - const results = executor - .findMatchingFunctions(createHogExecutionGlobals()) - .matchingFunctions.map((x) => executor.executeFunction(globals, x) as HogFunctionInvocationResult) - expect(results).toHaveLength(1) - - // Run the result one time simulating a successful fetch - const asyncResult1 = executor.executeAsyncResponse(results[0].invocation, createAsyncFunctionResponse()) - expect(asyncResult1.finished).toBe(false) - expect(asyncResult1.error).toBe(undefined) - expect(asyncResult1.asyncFunctionRequest).toBeDefined() - - // Run the result one more time simulating a second successful fetch - const asyncResult2 = executor.executeAsyncResponse(asyncResult1.invocation, createAsyncFunctionResponse()) + const invocation = createInvocation(fn) + + // Start the function + const result1 = executor.execute(invocation) + // Run the response one time simulating a successful fetch + result1.invocation.queue = 'hog' + result1.invocation.queueParameters = createAsyncFunctionResponse() + const result2 = executor.execute(result1.invocation) + expect(result2.finished).toBe(false) + expect(result2.error).toBe(undefined) + expect(result2.invocation.queue).toBe('fetch') + // This time we should see an error for hitting the loop limit - expect(asyncResult2.finished).toBe(false) - expect(asyncResult2.error).toEqual('Exceeded maximum number of async steps: 2') - expect(asyncResult2.logs.map((log) => log.message)).toEqual([ + result2.invocation.queue = 'hog' + result2.invocation.queueParameters = createAsyncFunctionResponse() + const result3 = executor.execute(result1.invocation) + expect(result3.finished).toBe(false) + expect(result3.error).toEqual('Exceeded maximum number of async steps: 2') + expect(result3.logs.map((log) => log.message)).toEqual([ 'Resuming function', 'Error executing function: HogVMException: Exceeded maximum number of async steps: 2', ]) @@ -305,14 +277,10 @@ describe('Hog Executor', () => { mockFunctionManager.getTeamHogFunctions.mockReturnValue([fn]) - const globals = createHogExecutionGlobals() - const results = executor - .findMatchingFunctions(createHogExecutionGlobals()) - .matchingFunctions.map((x) => executor.executeFunction(globals, x) as HogFunctionInvocationResult) - expect(results).toHaveLength(1) - expect(results[0].error).toContain('Execution timed out after 0.1 seconds. Performed ') + const result = executor.execute(createInvocation(fn)) + expect(result.error).toContain('Execution timed out after 0.1 seconds. Performed ') - expect(results[0].logs.map((log) => log.message)).toEqual([ + expect(result.logs.map((log) => log.message)).toEqual([ 'Executing function', 'I AM FIBONACCI', 'I AM FIBONACCI', @@ -339,8 +307,7 @@ describe('Hog Executor', () => { ...HOG_FILTERS_EXAMPLES.no_filters, }) - const globals = createHogExecutionGlobals() - const result = executor.executeFunction(globals, fn) + const result = executor.execute(createInvocation(fn)) expect(result?.capturedPostHogEvents).toEqual([ { distinct_id: 'distinct_id', @@ -368,7 +335,7 @@ describe('Hog Executor', () => { }, }, } as any) - const result = executor.executeFunction(globals, fn) + const result = executor.execute(createInvocation(fn, globals)) expect(result?.capturedPostHogEvents).toEqual([]) expect(result?.logs[1].message).toMatchInlineSnapshot( `"postHogCapture was called from an event that already executed this function. To prevent infinite loops, the event was not captured."` diff --git a/plugin-server/tests/cdp/hog-function-manager.test.ts b/plugin-server/tests/cdp/hog-function-manager.test.ts index ee4b9ded89f4e..267363c43ef65 100644 --- a/plugin-server/tests/cdp/hog-function-manager.test.ts +++ b/plugin-server/tests/cdp/hog-function-manager.test.ts @@ -94,7 +94,7 @@ describe('HogFunctionManager', () => { team_id: teamId1, name: 'Test Hog Function team 1', enabled: true, - bytecode: null, + bytecode: {}, filters: null, inputs_schema: [ { diff --git a/plugin-server/tests/cdp/hog-masker.test.ts b/plugin-server/tests/cdp/hog-masker.test.ts index 9a342572ea403..f117cff62b9ca 100644 --- a/plugin-server/tests/cdp/hog-masker.test.ts +++ b/plugin-server/tests/cdp/hog-masker.test.ts @@ -10,7 +10,7 @@ import { Hub } from '../../src/types' import { createHub } from '../../src/utils/db/hub' import { delay } from '../../src/utils/utils' import { HOG_MASK_EXAMPLES } from './examples' -import { createHogExecutionGlobals, createHogFunction } from './fixtures' +import { createHogExecutionGlobals, createHogFunction, createInvocation } from './fixtures' import { deleteKeysWithPrefix } from './helpers/redis' const mockNow: jest.Mock = require('../../src/utils/now').now as any @@ -52,7 +52,7 @@ describe('HogMasker', () => { it('should return all functions without masks', async () => { const normalFunction = createHogFunction({}) - const invocations = [{ globals: createHogExecutionGlobals(), hogFunction: normalFunction }] + const invocations = [createInvocation(normalFunction)] const res = await masker.filterByMasking(invocations) expect(res.notMasked).toHaveLength(1) @@ -63,21 +63,27 @@ describe('HogMasker', () => { const functionWithAllMasking = createHogFunction({ ...HOG_MASK_EXAMPLES.all, }) - const globals1 = createHogExecutionGlobals({ event: { uuid: '1' } as any }) - const globals2 = createHogExecutionGlobals({ event: { uuid: '2' } as any }) - const globals3 = createHogExecutionGlobals({ event: { uuid: '3' } as any }) - const invocations = [ - { globals: globals1, hogFunction: functionWithAllMasking }, - { globals: globals2, hogFunction: functionWithAllMasking }, - { globals: globals3, hogFunction: functionWithAllMasking }, - ] + + const invocation1 = createInvocation( + functionWithAllMasking, + createHogExecutionGlobals({ event: { uuid: '1' } as any }) + ) + const invocation2 = createInvocation( + functionWithAllMasking, + createHogExecutionGlobals({ event: { uuid: '2' } as any }) + ) + const invocation3 = createInvocation( + functionWithAllMasking, + createHogExecutionGlobals({ event: { uuid: '3' } as any }) + ) + const invocations = [invocation1, invocation2, invocation3] const res = await masker.filterByMasking(invocations) expect(res.notMasked).toHaveLength(1) expect(res.masked).toHaveLength(2) - expect(res.notMasked[0].globals).toEqual(globals1) - expect(res.masked[0].globals).toEqual(globals2) - expect(res.masked[1].globals).toEqual(globals3) + expect(res.notMasked[0].globals).toEqual(invocation1.globals) + expect(res.masked[0].globals).toEqual(invocation2.globals) + expect(res.masked[1].globals).toEqual(invocation3.globals) const res2 = await masker.filterByMasking(invocations) expect(res2.notMasked).toHaveLength(0) @@ -94,9 +100,9 @@ describe('HogMasker', () => { const functionWithNoMasking = createHogFunction({}) const globals = createHogExecutionGlobals() const invocations = [ - { globals, hogFunction: functionWithAllMasking }, - { globals, hogFunction: functionWithAllMasking2 }, - { globals, hogFunction: functionWithNoMasking }, + createInvocation(functionWithAllMasking, globals), + createInvocation(functionWithAllMasking2, globals), + createInvocation(functionWithNoMasking, globals), ] const res = await masker.filterByMasking(invocations) @@ -131,7 +137,7 @@ describe('HogMasker', () => { }) }) it('should re-allow after the ttl expires', async () => { - const invocations = [{ globals: createHogExecutionGlobals(), hogFunction: hogFunctionAll }] + const invocations = [createInvocation(hogFunctionAll)] expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(1) expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(0) expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(0) @@ -145,10 +151,10 @@ describe('HogMasker', () => { const globalsPerson2 = createHogExecutionGlobals({ person: { uuid: '2' } as any }) const invocations = [ - { globals: globalsPerson1, hogFunction: hogFunctionPerson }, - { globals: globalsPerson1, hogFunction: hogFunctionAll }, - { globals: globalsPerson2, hogFunction: hogFunctionPerson }, - { globals: globalsPerson2, hogFunction: hogFunctionAll }, + createInvocation(hogFunctionPerson, globalsPerson1), + createInvocation(hogFunctionAll, globalsPerson1), + createInvocation(hogFunctionPerson, globalsPerson2), + createInvocation(hogFunctionAll, globalsPerson2), ] const res = await masker.filterByMasking(invocations) expect(res.masked.length).toEqual(1) @@ -161,24 +167,24 @@ describe('HogMasker', () => { it('should mask until threshold passed', async () => { hogFunctionAll.masking!.threshold = 5 - const invocations = [{ globals: createHogExecutionGlobals(), hogFunction: hogFunctionAll }] + const invocation = createInvocation(hogFunctionAll) // First one goes through - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(1) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(1) // Next 4 should be masked - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(0) - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(0) - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(0) - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(0) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(0) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(0) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(0) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(0) // Now we have hit the threshold so it should not be masked - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(1) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(1) // Next 4 should be masked - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(0) - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(0) - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(0) - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(0) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(0) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(0) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(0) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(0) // Again the Nth one shouldn't be masked - expect((await masker.filterByMasking(invocations)).notMasked).toHaveLength(1) + expect((await masker.filterByMasking([invocation])).notMasked).toHaveLength(1) }) it('should mask threshold based in a batch', async () => { @@ -187,21 +193,11 @@ describe('HogMasker', () => { // If we have 10 invocations in a batch then we should have 2 invocations that are not masked expect( - ( - await masker.filterByMasking( - Array(10).fill({ globals: createHogExecutionGlobals(), hogFunction: hogFunctionAll }) - ) - ).notMasked + (await masker.filterByMasking(Array(10).fill(createInvocation(hogFunctionAll)))).notMasked ).toHaveLength(2) // Next one should cross the threshold - expect( - ( - await masker.filterByMasking([ - { globals: createHogExecutionGlobals(), hogFunction: hogFunctionAll }, - ]) - ).notMasked - ).toHaveLength(1) + expect((await masker.filterByMasking([createInvocation(hogFunctionAll)])).notMasked).toHaveLength(1) }) }) }) diff --git a/plugin-server/tests/cdp/hog-watcher.test.ts b/plugin-server/tests/cdp/hog-watcher.test.ts index ed5fdd1646717..a486d9006e738 100644 --- a/plugin-server/tests/cdp/hog-watcher.test.ts +++ b/plugin-server/tests/cdp/hog-watcher.test.ts @@ -9,6 +9,7 @@ import { HogFunctionInvocationResult } from '../../src/cdp/types' import { Hub } from '../../src/types' import { createHub } from '../../src/utils/db/hub' import { delay } from '../../src/utils/utils' +import { createInvocation } from './fixtures' import { deleteKeysWithPrefix } from './helpers/redis' const mockNow: jest.Mock = require('../../src/utils/now').now as any @@ -21,10 +22,9 @@ const createResult = (options: { }): HogFunctionInvocationResult => { return { invocation: { + ...createInvocation({ id: options.id }), id: 'invocation-id', teamId: 2, - hogFunctionId: options.id, - globals: {} as any, timings: [ { kind: 'async_function', diff --git a/plugin-server/tests/cdp/utils.test.ts b/plugin-server/tests/cdp/utils.test.ts index 9e1cf795e36b5..6640662b2e79e 100644 --- a/plugin-server/tests/cdp/utils.test.ts +++ b/plugin-server/tests/cdp/utils.test.ts @@ -2,7 +2,7 @@ import { DateTime } from 'luxon' import { HogFunctionInvocationResult } from '../../src/cdp/types' import { gzipObject, prepareLogEntriesForClickhouse, unGzipObject } from '../../src/cdp/utils' -import { insertHogFunction as _insertHogFunction } from './fixtures' +import { createHogFunction, createInvocation, insertHogFunction as _insertHogFunction } from './fixtures' describe('Utils', () => { describe('gzip compressions', () => { @@ -19,12 +19,8 @@ describe('Utils', () => { const startTime = DateTime.fromMillis(1620000000000) const example: HogFunctionInvocationResult = { invocation: { + ...createInvocation(createHogFunction({ id: 'hog-1' })), id: 'inv-1', - globals: {} as any, - teamId: 1, - hogFunctionId: 'hog-1', - vmState: undefined, - timings: [], }, finished: false, logs: [ From d34f0d998439cf19f4b68e6eec78edfc43010346 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 29 Aug 2024 16:35:28 +0200 Subject: [PATCH 04/75] Refactored tests --- plugin-server/src/cdp/cdp-api.ts | 43 +-- plugin-server/src/cdp/cdp-consumers.ts | 40 +-- plugin-server/src/cdp/utils.ts | 25 ++ ...backs-consumer.test.ts => cdp-api.test.ts} | 23 +- .../cdp/cdp-processed-events-consumer.test.ts | 291 +++++++++++------- .../tests/cdp/groups-manager.test.ts | 8 + 6 files changed, 253 insertions(+), 177 deletions(-) rename plugin-server/tests/cdp/{cdp-function-callbacks-consumer.test.ts => cdp-api.test.ts} (90%) diff --git a/plugin-server/src/cdp/cdp-api.ts b/plugin-server/src/cdp/cdp-api.ts index 943091af13814..40177bff4307a 100644 --- a/plugin-server/src/cdp/cdp-api.ts +++ b/plugin-server/src/cdp/cdp-api.ts @@ -8,7 +8,8 @@ import { AsyncFunctionExecutor } from './async-function-executor' import { HogExecutor } from './hog-executor' import { HogFunctionManager } from './hog-function-manager' import { HogWatcher, HogWatcherState } from './hog-watcher' -import { HogFunctionInvocation, HogFunctionInvocationAsyncRequest, HogFunctionType, LogEntry } from './types' +import { HogFunctionInvocationAsyncRequest, HogFunctionType, LogEntry } from './types' +import { createInvocation } from './utils' export class CdpApi { private hogExecutor: HogExecutor @@ -104,14 +105,6 @@ export class CdpApi { return } - const invocation: HogFunctionInvocation = { - id, - globals: globals, - teamId: team.id, - hogFunctionId: id, - timings: [], - } - // We use the provided config if given, otherwise the function's config // We use the provided config if given, otherwise the function's config const compoundConfiguration: HogFunctionType = { @@ -119,35 +112,45 @@ export class CdpApi { ...(configuration ?? {}), } - // TODO: Type the configuration better so we don't make mistakes here await this.hogFunctionManager.enrichWithIntegrations([compoundConfiguration]) - let response = this.hogExecutor.execute(compoundConfiguration, invocation) + const invocation = createInvocation( + { + ...globals, + project: { + id: team.id, + name: team.name, + url: `${this.hub.SITE_URL ?? 'http://localhost:8000'}/project/${team.id}`, + }, + }, + compoundConfiguration + ) + let response = this.hogExecutor.execute(invocation) const logs: LogEntry[] = [] - while (response.asyncFunctionRequest) { + while (!response.finished && response.invocation.queue === 'fetch') { invocation.vmState = response.invocation.vmState - const asyncFunctionRequest = response.asyncFunctionRequest + const fetchParams = response.invocation.queueParameters - if (mock_async_functions || asyncFunctionRequest.name !== 'fetch') { + if (mock_async_functions) { response.logs.push({ level: 'info', timestamp: DateTime.now(), - message: `Async function '${asyncFunctionRequest.name}' was mocked with arguments:`, + message: `Async function 'fetch' was mocked with arguments:`, }) response.logs.push({ level: 'info', timestamp: DateTime.now(), - message: `${asyncFunctionRequest.name}(${asyncFunctionRequest.args - .map((x) => JSON.stringify(x, null, 2)) - .join(', ')})`, + message: `fetch(${JSON.stringify(fetchParams, null, 2)})`, }) // Add the state, simulating what executeAsyncResponse would do - invocation.vmState!.stack.push({ status: 200, body: {} }) + invocation.queue = 'hog' + invocation.queueParameters = { response: { status: 200, body: {} } } } else { + // TODO const asyncInvocationRequest: HogFunctionInvocationAsyncRequest = { state: '', // WE don't care about the state for this level of testing teamId: team.id, @@ -169,7 +172,7 @@ export class CdpApi { } logs.push(...response.logs) - response = this.hogExecutor.execute(compoundConfiguration, invocation) + response = this.hogExecutor.execute(invocation) } logs.push(...response.logs) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 777f3145326ff..9425e36fab5d5 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -19,7 +19,7 @@ import { createKafkaProducerWrapper } from '../utils/db/hub' import { KafkaProducerWrapper } from '../utils/db/kafka-producer-wrapper' import { captureTeamEvent } from '../utils/posthog' import { status } from '../utils/status' -import { castTimestampOrNow, UUIDT } from '../utils/utils' +import { castTimestampOrNow } from '../utils/utils' import { RustyHook } from '../worker/rusty-hook' import { AsyncFunctionExecutor } from './async-function-executor' import { GroupsManager } from './groups-manager' @@ -40,6 +40,7 @@ import { import { convertToCaptureEvent, convertToHogFunctionInvocationGlobals, + createInvocation, gzipObject, prepareLogEntriesForClickhouse, unGzipObject, @@ -71,26 +72,6 @@ export interface TeamIDWithConfig { consoleLogIngestionEnabled: boolean } -function createInvocation(globals: HogFunctionInvocationGlobals, hogFunction: HogFunctionType): HogFunctionInvocation { - // Add the source of the trigger to the globals - const modifiedGlobals: HogFunctionInvocationGlobals = { - ...globals, - source: { - name: hogFunction.name ?? `Hog function: ${hogFunction.id}`, - url: `${globals.project.url}/pipeline/destinations/hog-${hogFunction.id}/configuration/`, - }, - } - - return { - id: new UUIDT().toString(), - globals: modifiedGlobals, - teamId: hogFunction.team_id, - hogFunction, - queue: 'hog', - timings: [], - } -} - abstract class CdpConsumerBase { batchConsumer?: BatchConsumer hogFunctionManager: HogFunctionManager @@ -339,9 +320,9 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { protected topic = KAFKA_EVENTS_JSON protected consumerGroupId = 'cdp-processed-events-consumer' - public async processBatch(invocationGlobals: HogFunctionInvocationGlobals[]): Promise { + public async processBatch(invocationGlobals: HogFunctionInvocationGlobals[]): Promise { if (!invocationGlobals.length) { - return + return [] } const invocationsToBeQueued = await this.runWithHeartbeat(() => @@ -349,6 +330,8 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { ) await this.queueInvocations(invocationsToBeQueued) await this.produceQueuedMessages() + + return invocationsToBeQueued } /** @@ -424,17 +407,6 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { }) return notMaskedInvocations - - // TODO: Option for routing to cyclotron instead of kafka - // TODO: Include "priority" in the job so that we can prioritize certain functions - // const results = ( - // await this.runManyWithHeartbeat(notMaskedInvocations, (item) => - // this.hogExecutor.executeFunction(item.globals, item.hogFunction) - // ) - // ).filter((x) => !!x) as HogFunctionInvocationResult[] - - // await this.hogWatcher.observeResults(results) - // return results }, }) } diff --git a/plugin-server/src/cdp/utils.ts b/plugin-server/src/cdp/utils.ts index f43c39739a66d..da1d64273f7aa 100644 --- a/plugin-server/src/cdp/utils.ts +++ b/plugin-server/src/cdp/utils.ts @@ -9,9 +9,11 @@ import { castTimestampOrNow, clickHouseTimestampToISO, UUIDT } from '../utils/ut import { HogFunctionCapturedEvent, HogFunctionFilterGlobals, + HogFunctionInvocation, HogFunctionInvocationGlobals, HogFunctionInvocationResult, HogFunctionLogEntrySerialized, + HogFunctionType, ParsedClickhouseEvent, } from './types' @@ -199,3 +201,26 @@ export const prepareLogEntriesForClickhouse = ( return preparedLogs } + +export function createInvocation( + globals: HogFunctionInvocationGlobals, + hogFunction: HogFunctionType +): HogFunctionInvocation { + // Add the source of the trigger to the globals + const modifiedGlobals: HogFunctionInvocationGlobals = { + ...globals, + source: { + name: hogFunction.name ?? `Hog function: ${hogFunction.id}`, + url: `${globals.project.url}/pipeline/destinations/hog-${hogFunction.id}/configuration/`, + }, + } + + return { + id: new UUIDT().toString(), + globals: modifiedGlobals, + teamId: hogFunction.team_id, + hogFunction, + queue: 'hog', + timings: [], + } +} diff --git a/plugin-server/tests/cdp/cdp-function-callbacks-consumer.test.ts b/plugin-server/tests/cdp/cdp-api.test.ts similarity index 90% rename from plugin-server/tests/cdp/cdp-function-callbacks-consumer.test.ts rename to plugin-server/tests/cdp/cdp-api.test.ts index 54f052fe439d4..3861dc7621a53 100644 --- a/plugin-server/tests/cdp/cdp-function-callbacks-consumer.test.ts +++ b/plugin-server/tests/cdp/cdp-api.test.ts @@ -3,7 +3,7 @@ import supertest from 'supertest' import { CdpApi } from '../../src/cdp/cdp-api' import { CdpFunctionCallbackConsumer } from '../../src/cdp/cdp-consumers' -import { HogFunctionType } from '../../src/cdp/types' +import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' import { Hub, Team } from '../../src/types' import { createHub } from '../../src/utils/db/hub' import { getFirstTeam, resetTestDatabase } from '../helpers/sql' @@ -64,7 +64,7 @@ const mockFetch: jest.Mock = require('../../src/utils/fetch').trackedFetch jest.setTimeout(1000) -describe('CDP Processed Events Consuner', () => { +describe('CDP API', () => { let processor: CdpFunctionCallbackConsumer let hub: Hub let closeHub: () => Promise @@ -103,10 +103,22 @@ describe('CDP Processed Events Consuner', () => { let app: express.Express let hogFunction: HogFunctionType - const globals = { + const globals: Partial = { + groups: {}, + person: { + uuid: '123', + name: 'Jane Doe', + url: 'https://example.com/person/123', + properties: { + email: 'example@posthog.com', + }, + }, event: { uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', name: '$pageview', + distinct_id: '123', + timestamp: '2021-09-28T14:00:00Z', + url: 'https://example.com/events/b3a1fe86-b10c-43cc-acaf-d208977608d0/2021-09-28T14:00:00Z', properties: { $lib_version: '1.0.0', }, @@ -151,6 +163,7 @@ describe('CDP Processed Events Consuner', () => { .send({ globals, mock_async_functions: true }) expect(res.status).toEqual(200) + console.log(res.body.logs[3].message) expect(res.body).toMatchObject({ status: 'success', error: 'undefined', @@ -161,7 +174,7 @@ describe('CDP Processed Events Consuner', () => { }, { level: 'debug', - message: "Suspending function due to async function call 'fetch'. Payload: 1140 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 1960 bytes", }, { level: 'info', @@ -169,7 +182,7 @@ describe('CDP Processed Events Consuner', () => { }, { level: 'info', - message: expect.stringContaining('fetch("https://example.com/posthog-webhook",'), + message: expect.stringContaining('fetch({'), }, { level: 'debug', diff --git a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts index 4de26bd05fca0..4bb3a36822008 100644 --- a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts +++ b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts @@ -1,10 +1,16 @@ import { CdpProcessedEventsConsumer } from '../../src/cdp/cdp-consumers' -import { HogFunctionType } from '../../src/cdp/types' +import { HogWatcherState } from '../../src/cdp/hog-watcher' +import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' import { Hub, Team } from '../../src/types' import { createHub } from '../../src/utils/db/hub' import { getFirstTeam, resetTestDatabase } from '../helpers/sql' import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' -import { createIncomingEvent, createMessage, insertHogFunction as _insertHogFunction } from './fixtures' +import { + createHogExecutionGlobals, + createIncomingEvent, + createMessage, + insertHogFunction as _insertHogFunction, +} from './fixtures' const mockConsumer = { on: jest.fn(), @@ -62,8 +68,6 @@ const mockProducer = require('../../src/utils/db/kafka-producer-wrapper').KafkaP jest.setTimeout(1000) -const noop = () => {} - const decodeKafkaMessage = (message: any): any => { return { ...message, @@ -71,7 +75,11 @@ const decodeKafkaMessage = (message: any): any => { } } -describe('CDP Processed Events Consuner', () => { +const decodeAllKafkaMessages = (): any[] => { + return mockProducer.produce.mock.calls.map((x) => decodeKafkaMessage(x[0])) +} + +describe('CDP Processed Events Consumer', () => { let processor: CdpProcessedEventsConsumer let hub: Hub let closeHub: () => Promise @@ -109,56 +117,161 @@ describe('CDP Processed Events Consuner', () => { /** * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios */ - it('can parse incoming messages correctly', async () => { - await insertHogFunction({ - ...HOG_EXAMPLES.simple_fetch, - ...HOG_INPUTS_EXAMPLES.simple_fetch, - ...HOG_FILTERS_EXAMPLES.no_filters, + + describe('common processing', () => { + let fnFetchNoFilters: HogFunctionType + let fnPrinterPageviewFilters: HogFunctionType + + let globals: HogFunctionInvocationGlobals + + beforeEach(async () => { + fnFetchNoFilters = await insertHogFunction({ + ...HOG_EXAMPLES.simple_fetch, + ...HOG_INPUTS_EXAMPLES.simple_fetch, + ...HOG_FILTERS_EXAMPLES.no_filters, + }) + + fnPrinterPageviewFilters = await insertHogFunction({ + ...HOG_EXAMPLES.input_printer, + ...HOG_INPUTS_EXAMPLES.secret_inputs, + ...HOG_FILTERS_EXAMPLES.pageview_or_autocapture_filter, + }) + + globals = createHogExecutionGlobals({ + project: { + id: team.id, + } as any, + event: { + uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', + name: '$pageview', + properties: { + $current_url: 'https://posthog.com', + $lib_version: '1.0.0', + }, + } as any, + }) }) - // Create a message that should be processed by this function - // Run the function and check that it was executed - await processor.handleEachBatch( - [ - createMessage( - createIncomingEvent(team.id, { - uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', - event: '$pageview', - properties: JSON.stringify({ - $lib_version: '1.0.0', - }), - }) - ), - ], - noop - ) - - expect(mockFetch).toHaveBeenCalledTimes(1) - expect(mockFetch.mock.calls[0]).toMatchInlineSnapshot(` - Array [ - "https://example.com/posthog-webhook", - Object { - "body": "{\\"event\\":{\\"uuid\\":\\"b3a1fe86-b10c-43cc-acaf-d208977608d0\\",\\"name\\":\\"$pageview\\",\\"distinct_id\\":\\"distinct_id_1\\",\\"properties\\":{\\"$lib_version\\":\\"1.0.0\\",\\"$elements_chain\\":\\"[]\\"},\\"timestamp\\":null,\\"url\\":\\"http://localhost:8000/project/2/events/b3a1fe86-b10c-43cc-acaf-d208977608d0/null\\"},\\"groups\\":{},\\"nested\\":{\\"foo\\":\\"http://localhost:8000/project/2/events/b3a1fe86-b10c-43cc-acaf-d208977608d0/null\\"},\\"person\\":null,\\"event_url\\":\\"http://localhost:8000/project/2/events/b3a1fe86-b10c-43cc-acaf-d208977608d0/null-test\\"}", - "headers": Object { - "version": "v=1.0.0", + + const matchInvocation = (hogFunction: HogFunctionType, globals: HogFunctionInvocationGlobals) => { + return { + hogFunction: { + id: hogFunction.id, }, - "method": "POST", - "timeout": 10000, - }, - ] - `) - }) + globals: { + event: globals.event, + }, + } + } + + it('should process events', async () => { + const invocations = await processor.processBatch([globals]) + + expect(invocations).toHaveLength(2) + expect(invocations).toMatchObject([ + matchInvocation(fnFetchNoFilters, globals), + matchInvocation(fnPrinterPageviewFilters, globals), + ]) - it('generates logs and metrics and produces them to kafka', async () => { - const hogFunction = await insertHogFunction({ - ...HOG_EXAMPLES.simple_fetch, - ...HOG_INPUTS_EXAMPLES.simple_fetch, - ...HOG_FILTERS_EXAMPLES.no_filters, + expect(mockProducer.produce).toHaveBeenCalledTimes(2) + + expect(decodeAllKafkaMessages()).toMatchObject([ + { + key: expect.any(String), + topic: 'cdp_function_callbacks_test', + value: { + state: expect.any(String), + }, + waitForAck: true, + }, + { + key: expect.any(String), + topic: 'cdp_function_callbacks_test', + value: { + state: expect.any(String), + }, + waitForAck: true, + }, + ]) }) - // Create a message that should be processed by this function - // Run the function and check that it was executed - await processor.handleEachBatch( - [ + it("should filter out functions that don't match the filter", async () => { + globals.event.properties.$current_url = 'https://nomatch.com' + + const invocations = await processor.processBatch([globals]) + + expect(invocations).toHaveLength(1) + expect(invocations).toMatchObject([matchInvocation(fnFetchNoFilters, globals)]) + expect(mockProducer.produce).toHaveBeenCalledTimes(2) + + expect(decodeAllKafkaMessages()).toMatchObject([ + { + key: expect.any(String), + topic: 'clickhouse_app_metrics2_test', + value: { + app_source: 'hog_function', + app_source_id: fnPrinterPageviewFilters.id, + count: 1, + metric_kind: 'other', + metric_name: 'filtered', + team_id: 2, + timestamp: expect.any(String), + }, + }, + { + topic: 'cdp_function_callbacks_test', + }, + ]) + }) + + it.each([ + [HogWatcherState.disabledForPeriod, 'disabled_temporarily'], + [HogWatcherState.disabledIndefinitely, 'disabled_permanently'], + ])('should filter out functions that are disabled', async (state, metric_name) => { + await processor.hogWatcher.forceStateChange(fnFetchNoFilters.id, state) + await processor.hogWatcher.forceStateChange(fnPrinterPageviewFilters.id, state) + + const invocations = await processor.processBatch([globals]) + + expect(invocations).toHaveLength(0) + expect(mockProducer.produce).toHaveBeenCalledTimes(2) + + expect(decodeAllKafkaMessages()).toMatchObject([ + { + topic: 'clickhouse_app_metrics2_test', + value: { + app_source: 'hog_function', + app_source_id: fnFetchNoFilters.id, + count: 1, + metric_kind: 'failure', + metric_name: metric_name, + team_id: 2, + }, + }, + { + topic: 'clickhouse_app_metrics2_test', + value: { + app_source: 'hog_function', + app_source_id: fnPrinterPageviewFilters.id, + count: 1, + metric_kind: 'failure', + metric_name: metric_name, + team_id: 2, + }, + }, + ]) + }) + }) + + describe('kafka parsing', () => { + it('can parse incoming messages correctly', async () => { + await insertHogFunction({ + ...HOG_EXAMPLES.simple_fetch, + ...HOG_INPUTS_EXAMPLES.simple_fetch, + ...HOG_FILTERS_EXAMPLES.no_filters, + }) + // Create a message that should be processed by this function + // Run the function and check that it was executed + await processor._handleKafkaBatch([ createMessage( createIncomingEvent(team.id, { uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', @@ -168,77 +281,19 @@ describe('CDP Processed Events Consuner', () => { }), }) ), - ], - noop - ) - - expect(mockFetch).toHaveBeenCalledTimes(1) - // Once for the async callback, twice for the logs, once for metrics - expect(mockProducer.produce).toHaveBeenCalledTimes(4) - - expect(decodeKafkaMessage(mockProducer.produce.mock.calls[0][0])).toEqual({ - key: expect.any(String), - topic: 'clickhouse_app_metrics2_test', - value: { - app_source: 'hog_function', - team_id: 2, - app_source_id: hogFunction.id, - metric_kind: 'success', - metric_name: 'succeeded', - count: 1, - timestamp: expect.any(String), - }, - waitForAck: true, - }) - - expect(decodeKafkaMessage(mockProducer.produce.mock.calls[1][0])).toEqual({ - key: expect.any(String), - topic: 'log_entries_test', - value: { - instance_id: expect.any(String), - level: 'debug', - log_source: 'hog_function', - log_source_id: expect.any(String), - message: 'Executing function', - team_id: 2, - timestamp: expect.any(String), - }, - - waitForAck: true, - }) - - expect(decodeKafkaMessage(mockProducer.produce.mock.calls[2][0])).toMatchObject({ - topic: 'log_entries_test', - value: { - log_source: 'hog_function', - message: "Suspending function due to async function call 'fetch'. Payload: 1497 bytes", - team_id: 2, - }, - }) + ]) - const msg = decodeKafkaMessage(mockProducer.produce.mock.calls[3][0]) - - expect(msg).toEqual({ - key: expect.any(String), - topic: 'cdp_function_callbacks_test', - value: { - state: expect.any(String), - hogFunctionId: hogFunction.id, - teamId: 2, - asyncFunctionResponse: { - response: { - status: 200, - body: { success: true }, + // Generall check that the message seemed to get processed + expect(decodeAllKafkaMessages()).toMatchObject([ + { + key: expect.any(String), + topic: 'cdp_function_callbacks_test', + value: { + state: expect.any(String), }, - timings: [ - { - kind: 'async_function', - duration_ms: expect.any(Number), - }, - ], + waitForAck: true, }, - }, - waitForAck: true, + ]) }) }) }) diff --git a/plugin-server/tests/cdp/groups-manager.test.ts b/plugin-server/tests/cdp/groups-manager.test.ts index eb3d718211ce1..f489d6b019045 100644 --- a/plugin-server/tests/cdp/groups-manager.test.ts +++ b/plugin-server/tests/cdp/groups-manager.test.ts @@ -50,6 +50,7 @@ describe('Groups Manager', () => { it('does nothing if no group properties found', async () => { const globals = createHogExecutionGlobals({ + groups: undefined, event: { properties: { $groups: { GroupA: 'id-1', GroupB: 'id-2' }, @@ -84,6 +85,7 @@ describe('Groups Manager', () => { { team_id: 1, group_type_index: 1, group_key: 'id-2', group_properties: { prop: 'value-2' } }, ] const globals = createHogExecutionGlobals({ + groups: undefined, event: { properties: { $groups: { GroupA: 'id-1', GroupB: 'id-2' }, @@ -125,14 +127,17 @@ describe('Groups Manager', () => { const items = [ // Should get both groups enriched createHogExecutionGlobals({ + groups: undefined, event: { properties: { $groups: { GroupA: 'id-1', GroupB: 'id-2' } } } as any, }), // Should get its group enriched (via reference) createHogExecutionGlobals({ + groups: undefined, event: { properties: { $groups: { GroupA: 'id-1' } } } as any, }), // Should get the right group for its team createHogExecutionGlobals({ + groups: undefined, project: { id: 2 } as any, event: { properties: { $groups: { GroupA: 'id-1' } } } as any, }), @@ -191,10 +196,12 @@ describe('Groups Manager', () => { it('cached group type queries', async () => { const globals = [ createHogExecutionGlobals({ + groups: undefined, project: { id: 1 } as any, event: { properties: { $groups: { GroupA: 'id-1', GroupB: 'id-2' } } } as any, }), createHogExecutionGlobals({ + groups: undefined, project: { id: 2 } as any, event: { properties: { $groups: { GroupA: 'id-1', GroupB: 'id-2' } } } as any, }), @@ -209,6 +216,7 @@ describe('Groups Manager', () => { globals.push( createHogExecutionGlobals({ + groups: undefined, project: { id: 3 } as any, event: { properties: { $groups: { GroupA: 'id-1', GroupB: 'id-2' } } } as any, }) From d2f923d4b035e5d5fd63e7884d5bdaac8892d662 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 29 Aug 2024 16:50:33 +0200 Subject: [PATCH 05/75] fixes --- plugin-server/tests/cdp/cdp-api.test.ts | 8 --- plugin-server/tests/cdp/fixtures.ts | 8 +-- plugin-server/tests/cdp/hog-executor.test.ts | 73 +++++++++++--------- 3 files changed, 40 insertions(+), 49 deletions(-) diff --git a/plugin-server/tests/cdp/cdp-api.test.ts b/plugin-server/tests/cdp/cdp-api.test.ts index aa3132f58d634..5b45946816fea 100644 --- a/plugin-server/tests/cdp/cdp-api.test.ts +++ b/plugin-server/tests/cdp/cdp-api.test.ts @@ -123,14 +123,6 @@ describe('CDP API', () => { $lib_version: '1.0.0', }, }, - groups: {}, - person: { - uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', - distinct_ids: ['b3a1fe86-b10c-43cc-acaf-d208977608d0'], - properties: { - email: 'test@posthog.com', - }, - }, } beforeEach(async () => { diff --git a/plugin-server/tests/cdp/fixtures.ts b/plugin-server/tests/cdp/fixtures.ts index ab588974e2e0c..52b8c20cf4b3d 100644 --- a/plugin-server/tests/cdp/fixtures.ts +++ b/plugin-server/tests/cdp/fixtures.ts @@ -108,19 +108,13 @@ export const createHogExecutionGlobals = ( ): HogFunctionInvocationGlobals => { return { groups: {}, - person: { - uuid: 'person-uuid', - name: 'person', - url: 'http://localhost:8000/persons/1', - properties: {}, - }, ...data, person: { uuid: 'uuid', name: 'test', url: 'http://localhost:8000/persons/1', properties: { - $lib_version: '1.2.3', + email: 'test@posthog.com', }, ...(data.person ?? {}), }, diff --git a/plugin-server/tests/cdp/hog-executor.test.ts b/plugin-server/tests/cdp/hog-executor.test.ts index 3e895d16f9588..ae7e609d02155 100644 --- a/plugin-server/tests/cdp/hog-executor.test.ts +++ b/plugin-server/tests/cdp/hog-executor.test.ts @@ -94,7 +94,7 @@ describe('Hog Executor', () => { { timestamp: expect.any(DateTime), level: 'debug', - message: "Suspending function due to async function call 'fetch'. Payload: 1740 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 1768 bytes", }, ]) }) @@ -130,27 +130,28 @@ describe('Hog Executor', () => { url: 'https://example.com/posthog-webhook', method: 'POST', headers: { version: 'v=1.2.3' }, - body: JSON.stringify({ - event: { - uuid: 'uuid', - name: 'test', - distinct_id: 'distinct_id', - url: 'http://localhost:8000/events/1', - properties: { $lib_version: '1.2.3' }, - timestamp: '2024-06-07T12:00:00.000Z', - }, - groups: {}, - nested: { foo: 'http://localhost:8000/events/1' }, - person: { - uuid: 'person-uuid', - name: 'person', - url: 'http://localhost:8000/persons/1', - properties: {}, - }, - event_url: 'http://localhost:8000/events/1-test', - }), }, }) + + expect(JSON.parse(result.invocation.queueParameters!.body)).toEqual({ + event: { + uuid: 'uuid', + name: 'test', + distinct_id: 'distinct_id', + url: 'http://localhost:8000/events/1', + properties: { $lib_version: '1.2.3' }, + timestamp: '2024-06-07T12:00:00.000Z', + }, + groups: {}, + nested: { foo: 'http://localhost:8000/events/1' }, + person: { + uuid: 'uuid', + name: 'test', + url: 'http://localhost:8000/persons/1', + properties: { email: 'test@posthog.com' }, + }, + event_url: 'http://localhost:8000/events/1-test', + }) }) it('executes the full function in a loop', () => { @@ -170,13 +171,15 @@ describe('Hog Executor', () => { expect(secondResult.finished).toBe(true) expect(secondResult.error).toBeUndefined() - expect(logs.map((log) => log.message)).toEqual([ - 'Executing function', - "Suspending function due to async function call 'fetch'. Payload: 1740 bytes", - 'Resuming function', - 'Fetch response:, {"status":200,"body":"success"}', - 'Function completed in 100ms. Sync: 0ms. Mem: 722 bytes. Ops: 22.', - ]) + expect(logs.map((log) => log.message)).toMatchInlineSnapshot(` + Array [ + "Executing function", + "Suspending function due to async function call 'fetch'. Payload: 1768 bytes", + "Resuming function", + "Fetch response:, {\\"status\\":200,\\"body\\":\\"success\\"}", + "Function completed in 100ms. Sync: 0ms. Mem: 750 bytes. Ops: 22.", + ] + `) }) it('parses the responses body if a string', () => { @@ -190,13 +193,15 @@ describe('Hog Executor', () => { const secondResult = executor.execute(result.invocation) logs.push(...secondResult.logs) - expect(logs.map((log) => log.message)).toEqual([ - 'Executing function', - "Suspending function due to async function call 'fetch'. Payload: 1740 bytes", - 'Resuming function', - 'Fetch response:, {"status":200,"body":{"foo":"bar"}}', - 'Function completed in 100ms. Sync: 0ms. Mem: 722 bytes. Ops: 22.', - ]) + expect(logs.map((log) => log.message)).toMatchInlineSnapshot(` + Array [ + "Executing function", + "Suspending function due to async function call 'fetch'. Payload: 1768 bytes", + "Resuming function", + "Fetch response:, {\\"status\\":200,\\"body\\":{\\"foo\\":\\"bar\\"}}", + "Function completed in 100ms. Sync: 0ms. Mem: 750 bytes. Ops: 22.", + ] + `) }) }) From 36578d7f91e436a1d3e388854fbcca156b51a52a Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 29 Aug 2024 18:11:40 +0200 Subject: [PATCH 06/75] Fixes all over --- .../src/cdp/async-function-executor.ts | 210 ---------------- plugin-server/src/cdp/cdp-api.ts | 120 +++++----- plugin-server/src/cdp/cdp-consumers.ts | 41 ++-- plugin-server/src/cdp/fetch-executor.ts | 143 +++++++++++ plugin-server/src/cdp/hog-executor.ts | 18 +- plugin-server/src/cdp/types.ts | 37 ++- plugin-server/src/main/pluginsServer.ts | 47 ++-- plugin-server/tests/cdp/cdp-api.test.ts | 2 +- .../tests/cdp/cdp-consumer.e2e.test.ts | 225 ++++++++++++++++++ 9 files changed, 508 insertions(+), 335 deletions(-) delete mode 100644 plugin-server/src/cdp/async-function-executor.ts create mode 100644 plugin-server/src/cdp/fetch-executor.ts create mode 100644 plugin-server/tests/cdp/cdp-consumer.e2e.test.ts diff --git a/plugin-server/src/cdp/async-function-executor.ts b/plugin-server/src/cdp/async-function-executor.ts deleted file mode 100644 index fe6df753dc723..0000000000000 --- a/plugin-server/src/cdp/async-function-executor.ts +++ /dev/null @@ -1,210 +0,0 @@ -import cyclotron from '@posthog/cyclotron' -import { Histogram } from 'prom-client' - -import { buildIntegerMatcher } from '../config/config' -import { PluginsServerConfig, ValueMatcher } from '../types' -import { trackedFetch } from '../utils/fetch' -import { status } from '../utils/status' -import { RustyHook } from '../worker/rusty-hook' -import { HogFunctionInvocationAsyncRequest, HogFunctionInvocationAsyncResponse } from './types' - -export const BUCKETS_KB_WRITTEN = [0, 128, 512, 1024, 2024, 4096, 10240, Infinity] - -const histogramFetchPayloadSize = new Histogram({ - name: 'cdp_async_function_fetch_payload_size_kb', - help: 'The size in kb of the batches we are receiving from Kafka', - buckets: BUCKETS_KB_WRITTEN, -}) - -const histogramHogHooksPayloadSize = new Histogram({ - name: 'cdp_async_function_hoghooks_payload_size_kb', - help: 'The size in kb of the batches we are receiving from Kafka', - buckets: BUCKETS_KB_WRITTEN, -}) - -export type AsyncFunctionExecutorOptions = { - sync?: boolean -} - -export class AsyncFunctionExecutor { - hogHookEnabledForTeams: ValueMatcher - cyclotronEnabledForTeams: ValueMatcher - - constructor(private serverConfig: PluginsServerConfig, private rustyHook: RustyHook) { - this.hogHookEnabledForTeams = buildIntegerMatcher(serverConfig.CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS, true) - this.cyclotronEnabledForTeams = buildIntegerMatcher(serverConfig.CDP_ASYNC_FUNCTIONS_CYCLOTRON_TEAMS, true) - } - - async execute( - request: HogFunctionInvocationAsyncRequest, - options: AsyncFunctionExecutorOptions = { sync: false } - ): Promise { - if (!request.asyncFunctionRequest) { - throw new Error('No async function request provided') - } - - const loggingContext = { - hogFunctionId: request.hogFunctionId, - asyncFunctionName: request.asyncFunctionRequest.name, - } - status.info('🦔', `[AsyncFunctionExecutor] Executing async function`, loggingContext) - - switch (request.asyncFunctionRequest.name) { - // TODO: Add error case here - if we don't get a valid queued message then we should log something against the function - case 'fetch': - return await this.asyncFunctionFetch(request, options) - default: - status.error( - '🦔', - `[HogExecutor] Unknown async function: ${request.asyncFunctionRequest.name}`, - loggingContext - ) - } - } - - private async asyncFunctionFetch( - request: HogFunctionInvocationAsyncRequest, - options?: AsyncFunctionExecutorOptions - ): Promise { - if (!request.asyncFunctionRequest) { - return - } - - const asyncFunctionResponse: HogFunctionInvocationAsyncResponse['asyncFunctionResponse'] = { - response: null, - timings: [], - } - - try { - // Sanitize the args - const [url, fetchOptions] = request.asyncFunctionRequest.args as [ - string | undefined, - Record | undefined - ] - - if (typeof url !== 'string') { - status.error('🦔', `[HogExecutor] Invalid URL`, { ...request, url }) - return - } - - const method = fetchOptions?.method || 'POST' - const headers = fetchOptions?.headers || { - 'Content-Type': 'application/json', - } - let body = fetchOptions?.body - // Modify the body to ensure it is a string (we allow Hog to send an object to keep things simple) - body = body ? (typeof body === 'string' ? body : JSON.stringify(body)) : body - - // Finally overwrite the args with the sanitized ones - request.asyncFunctionRequest.args = [url, { method, headers, body }] - - if (body) { - histogramFetchPayloadSize.observe(body.length / 1024) - } - - // If the caller hasn't forced it to be synchronous and the team has the cyclotron or - // rustyhook enabled, enqueue it in one of those services. - if (!options?.sync && this.cyclotronEnabledForTeams(request.teamId)) { - try { - await cyclotron.createJob({ - teamId: request.teamId, - functionId: request.hogFunctionId, - queueName: 'fetch', - // TODO: The async function compression changes happen upstream of this - // function. I guess we'll want to unwind that change because we actually - // want the `vmState` (and the rest of state) so we can put it into PG here. - vmState: '', - parameters: JSON.stringify({ - return_queue: 'hog', - url, - method, - headers, - // The body is passed in the `blob` field below. - }), - metadata: JSON.stringify({}), - // Fetch bodies are passed in the binary blob column/field. - blob: toUint8Array(body), - }) - } catch (e) { - status.error( - '🦔', - `[HogExecutor] Cyclotron failed to enqueue async fetch function, sending directly instead`, - { - error: e, - } - ) - } - } else if (!options?.sync && this.hogHookEnabledForTeams(request.teamId)) { - const hoghooksPayload = JSON.stringify(request) - - histogramHogHooksPayloadSize.observe(hoghooksPayload.length / 1024) - - const enqueued = await this.rustyHook.enqueueForHog(JSON.stringify(request)) - if (enqueued) { - return - } - } - - status.info('🦔', `[HogExecutor] Webhook not sent via rustyhook, sending directly instead`) - - const start = performance.now() - const fetchResponse = await trackedFetch(url, { - method, - body, - headers, - timeout: this.serverConfig.EXTERNAL_REQUEST_TIMEOUT_MS, - }) - - let responseBody = await fetchResponse.text() - try { - responseBody = JSON.parse(responseBody) - } catch (err) { - // Ignore - } - - const duration = performance.now() - start - - asyncFunctionResponse.timings!.push({ - kind: 'async_function', - duration_ms: duration, - }) - - asyncFunctionResponse.response = { - status: fetchResponse.status, - body: responseBody, - } - } catch (err) { - status.error('🦔', `[HogExecutor] Error during fetch`, { error: String(err) }) - asyncFunctionResponse.error = 'Something went wrong with the fetch request.' - } - - const response: HogFunctionInvocationAsyncResponse = { - state: request.state, - teamId: request.teamId, - hogFunctionId: request.hogFunctionId, - asyncFunctionResponse, - } - - return response - } -} - -function toUint8Array(data: any): Uint8Array | undefined { - if (data === null || data === undefined) { - return undefined - } - - if (data instanceof Uint8Array) { - return data - } - - if (data instanceof ArrayBuffer) { - return new Uint8Array(data) - } - - if (typeof data === 'string') { - return new TextEncoder().encode(data) - } - - return new TextEncoder().encode(JSON.stringify(data)) -} diff --git a/plugin-server/src/cdp/cdp-api.ts b/plugin-server/src/cdp/cdp-api.ts index 40177bff4307a..34de05942471e 100644 --- a/plugin-server/src/cdp/cdp-api.ts +++ b/plugin-server/src/cdp/cdp-api.ts @@ -4,17 +4,17 @@ import { DateTime } from 'luxon' import { Hub } from '../types' import { status } from '../utils/status' import { delay } from '../utils/utils' -import { AsyncFunctionExecutor } from './async-function-executor' +import { FetchExecutor } from './fetch-executor' import { HogExecutor } from './hog-executor' import { HogFunctionManager } from './hog-function-manager' import { HogWatcher, HogWatcherState } from './hog-watcher' -import { HogFunctionInvocationAsyncRequest, HogFunctionType, LogEntry } from './types' +import { HogFunctionInvocationResult, HogFunctionType, LogEntry } from './types' import { createInvocation } from './utils' export class CdpApi { private hogExecutor: HogExecutor private hogFunctionManager: HogFunctionManager - private asyncFunctionExecutor: AsyncFunctionExecutor + private fetchExecutor: FetchExecutor private hogWatcher: HogWatcher constructor( @@ -22,13 +22,13 @@ export class CdpApi { dependencies: { hogExecutor: HogExecutor hogFunctionManager: HogFunctionManager - asyncFunctionExecutor: AsyncFunctionExecutor + fetchExecutor: FetchExecutor hogWatcher: HogWatcher } ) { this.hogExecutor = dependencies.hogExecutor this.hogFunctionManager = dependencies.hogFunctionManager - this.asyncFunctionExecutor = dependencies.asyncFunctionExecutor + this.fetchExecutor = dependencies.fetchExecutor this.hogWatcher = dependencies.hogWatcher } @@ -114,72 +114,70 @@ export class CdpApi { await this.hogFunctionManager.enrichWithIntegrations([compoundConfiguration]) - const invocation = createInvocation( - { - ...globals, - project: { - id: team.id, - name: team.name, - url: `${this.hub.SITE_URL ?? 'http://localhost:8000'}/project/${team.id}`, - }, - }, - compoundConfiguration - ) - let response = this.hogExecutor.execute(invocation) + let lastResponse: HogFunctionInvocationResult | null = null const logs: LogEntry[] = [] - while (!response.finished && response.invocation.queue === 'fetch') { - invocation.vmState = response.invocation.vmState + let count = 0 - const fetchParams = response.invocation.queueParameters - - if (mock_async_functions) { - response.logs.push({ - level: 'info', - timestamp: DateTime.now(), - message: `Async function 'fetch' was mocked with arguments:`, - }) - - response.logs.push({ - level: 'info', - timestamp: DateTime.now(), - message: `fetch(${JSON.stringify(fetchParams, null, 2)})`, - }) - - // Add the state, simulating what executeAsyncResponse would do - invocation.queue = 'hog' - invocation.queueParameters = { response: { status: 200, body: {} } } - } else { - // TODO - const asyncInvocationRequest: HogFunctionInvocationAsyncRequest = { - state: '', // WE don't care about the state for this level of testing - teamId: team.id, - hogFunctionId: hogFunction.id, - asyncFunctionRequest, - } - const asyncRes = await this.asyncFunctionExecutor!.execute(asyncInvocationRequest, { - sync: true, - }) - - if (!asyncRes || asyncRes.asyncFunctionResponse.error) { - response.logs.push({ - level: 'error', - timestamp: DateTime.now(), - message: 'Failed to execute async function', - }) + while (!lastResponse || !lastResponse.finished) { + if (count > 5) { + throw new Error('Too many iterations') + } + count += 1 + + let response: HogFunctionInvocationResult + + const invocation = + lastResponse?.invocation || + createInvocation( + { + ...globals, + project: { + id: team.id, + name: team.name, + url: `${this.hub.SITE_URL ?? 'http://localhost:8000'}/project/${team.id}`, + }, + }, + compoundConfiguration + ) + + if (invocation.queue === 'fetch') { + if (mock_async_functions) { + // Add the state, simulating what executeAsyncResponse would do + response = { + invocation: { + ...invocation, + queue: 'hog', + queueParameters: { response: { status: 200, body: {} } }, + }, + finished: false, + logs: [ + { + level: 'info', + timestamp: DateTime.now(), + message: `Async function 'fetch' was mocked with arguments:`, + }, + { + level: 'info', + timestamp: DateTime.now(), + message: `fetch(${JSON.stringify(invocation.queueParameters, null, 2)})`, + }, + ], + } + } else { + response = await this.fetchExecutor!.executeLocally(invocation) } - invocation.vmState!.stack.push(asyncRes?.asyncFunctionResponse.response ?? null) + } else { + response = this.hogExecutor.execute(invocation) } logs.push(...response.logs) - response = this.hogExecutor.execute(invocation) + lastResponse = response } - logs.push(...response.logs) - res.json({ - status: response.finished ? 'success' : 'error', - error: String(response.error), + status: lastResponse.finished ? 'success' : 'error', + error: String(lastResponse.error), logs: logs, }) } catch (e) { diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 9425e36fab5d5..6f9435e93d1e9 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -21,7 +21,7 @@ import { captureTeamEvent } from '../utils/posthog' import { status } from '../utils/status' import { castTimestampOrNow } from '../utils/utils' import { RustyHook } from '../worker/rusty-hook' -import { AsyncFunctionExecutor } from './async-function-executor' +import { FetchExecutor } from './fetch-executor' import { GroupsManager } from './groups-manager' import { HogExecutor } from './hog-executor' import { HogFunctionManager } from './hog-function-manager' @@ -75,7 +75,7 @@ export interface TeamIDWithConfig { abstract class CdpConsumerBase { batchConsumer?: BatchConsumer hogFunctionManager: HogFunctionManager - asyncFunctionExecutor: AsyncFunctionExecutor + fetchExecutor: FetchExecutor hogExecutor: HogExecutor hogWatcher: HogWatcher hogMasker: HogMasker @@ -100,7 +100,7 @@ abstract class CdpConsumerBase { this.hogMasker = new HogMasker(this.redis) this.hogExecutor = new HogExecutor(this.hogFunctionManager) const rustyHook = this.hub?.rustyHook ?? new RustyHook(this.hub) - this.asyncFunctionExecutor = new AsyncFunctionExecutor(this.hub, rustyHook) + this.fetchExecutor = new FetchExecutor(this.hub, rustyHook) this.groupsManager = new GroupsManager(this.hub) } @@ -287,6 +287,8 @@ abstract class CdpConsumerBase { this.kafkaProducer = await createKafkaProducerWrapper(this.hub) this.kafkaProducer.producer.connect() + + await this.startKafkaConsumer() } public async stop(): Promise { @@ -352,7 +354,6 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { // Find all functions that could need running invocationGlobals.forEach((globals) => { - // TODO: Move that out of finding to somewhere else const { matchingFunctions, nonMatchingFunctions } = this.hogExecutor.findMatchingFunctions(globals) possibleInvocations.push( @@ -466,39 +467,40 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { if (!invocations.length) { return } - const invocationResults = await this.runWithHeartbeat(() => this.processInvocations(invocations)) - await this.processInvocationResults(invocationResults) - } - protected async processInvocations(invocations: HogFunctionInvocation[]): Promise { - // These are either new invocations or responses - // The key thing we need to consider is taking the response, adding it to the invocation state and then continuing - return await runInstrumentedFunction({ + const invocationResults = await runInstrumentedFunction({ statsKey: `cdpConsumer.handleEachBatch.executeInvocations`, func: async () => { // TODO: Handle if the invocation step is not "hog" so we should do fetch instead... const results = await this.runManyWithHeartbeat(invocations, (item) => this.hogExecutor.execute(item)) - await this.hogWatcher.observeResults(results) return results }, }) + + await this.hogWatcher.observeResults(invocationResults) + await this.processInvocationResults(invocationResults) + await this.produceQueuedMessages() } protected async processInvocationResults(results: HogFunctionInvocationResult[]): Promise { await runInstrumentedFunction({ statsKey: `cdpConsumer.handleEachBatch.produceResults`, func: async () => { + console.log('Processing invocations results', results.length) + await Promise.all( results.map(async (result) => { // Tricky: We want to pull all the logs out as we don't want them to be passed around to any subsequent functions - this.produceAppMetric({ - team_id: result.invocation.teamId, - app_source_id: result.invocation.hogFunction.id, - metric_kind: result.error ? 'failure' : 'success', - metric_name: result.error ? 'failed' : 'succeeded', - count: 1, - }) + if (result.finished || result.error) { + this.produceAppMetric({ + team_id: result.invocation.teamId, + app_source_id: result.invocation.hogFunction.id, + metric_kind: result.error ? 'failure' : 'success', + metric_name: result.error ? 'failed' : 'succeeded', + count: 1, + }) + } this.produceLogs(result) @@ -557,6 +559,7 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { // If it looks like a try { const invocation = await unGzipObject(item.state) + if ('asyncFunctionResponse' in item) { // This means it is a callback from hoghooks so we need to add the response to the invocation invocation.queue = 'hog' diff --git a/plugin-server/src/cdp/fetch-executor.ts b/plugin-server/src/cdp/fetch-executor.ts new file mode 100644 index 0000000000000..b2e99ef0a1836 --- /dev/null +++ b/plugin-server/src/cdp/fetch-executor.ts @@ -0,0 +1,143 @@ +import { Histogram } from 'prom-client' + +import { buildIntegerMatcher } from '../config/config' +import { PluginsServerConfig, ValueMatcher } from '../types' +import { trackedFetch } from '../utils/fetch' +import { status } from '../utils/status' +import { RustyHook } from '../worker/rusty-hook' +import { + HogFunctionInvocation, + HogFunctionInvocationAsyncRequest, + HogFunctionInvocationResult, + HogFunctionQueueParametersFetchRequest, + HogFunctionQueueParametersFetchResponse, +} from './types' +import { gzipObject } from './utils' + +export const BUCKETS_KB_WRITTEN = [0, 128, 512, 1024, 2024, 4096, 10240, Infinity] + +const histogramFetchPayloadSize = new Histogram({ + name: 'cdp_async_function_fetch_payload_size_kb', + help: 'The size in kb of the batches we are receiving from Kafka', + buckets: BUCKETS_KB_WRITTEN, +}) + +const histogramHogHooksPayloadSize = new Histogram({ + name: 'cdp_async_function_hoghooks_payload_size_kb', + help: 'The size in kb of the batches we are receiving from Kafka', + buckets: BUCKETS_KB_WRITTEN, +}) + +/** + * This class is only used by the kafka based queuing system. For the Cyclotron system there is no need for this. + */ +export class FetchExecutor { + hogHookEnabledForTeams: ValueMatcher + + constructor(private serverConfig: PluginsServerConfig, private rustyHook: RustyHook) { + this.hogHookEnabledForTeams = buildIntegerMatcher(serverConfig.CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS, true) + } + + async execute(invocation: HogFunctionInvocation): Promise { + if (invocation.queue !== 'fetch' || !invocation.queueParameters) { + throw new Error('Bad invocation') + } + + const params = invocation.queueParameters as HogFunctionQueueParametersFetchRequest + if (params.body) { + histogramFetchPayloadSize.observe(params.body.length / 1024) + } + + try { + if (this.hogHookEnabledForTeams(invocation.teamId)) { + // This is very temporary until we are commited to Cyclotron + const payload: HogFunctionInvocationAsyncRequest = { + state: await gzipObject(invocation), + teamId: invocation.teamId, + hogFunctionId: invocation.hogFunction.id, + asyncFunctionRequest: { + name: 'fetch', + args: [ + params.url, + { + ...params, + }, + ], + }, + } + const hoghooksPayload = JSON.stringify(payload) + histogramHogHooksPayloadSize.observe(hoghooksPayload.length / 1024) + const enqueued = await this.rustyHook.enqueueForHog(hoghooksPayload) + if (enqueued) { + // We return nothing here hoghooks will take care of that + return + } + } + + status.info('🦔', `[HogExecutor] Webhook not sent via rustyhook, sending directly instead`) + } catch (err) { + status.error('🦔', `[HogExecutor] Error during fetch`, { error: String(err) }) + } + + return await this.executeLocally(invocation) + } + + async executeLocally(invocation: HogFunctionInvocation): Promise { + if (invocation.queue !== 'fetch' || !invocation.queueParameters) { + throw new Error('Bad invocation') + } + + const params = invocation.queueParameters as HogFunctionQueueParametersFetchRequest + + const resParams: HogFunctionQueueParametersFetchResponse = { + response: { + status: 0, + body: {}, + }, + error: null, + timings: [], + } + + try { + const start = performance.now() + const fetchResponse = await trackedFetch(params.url, { + method: params.method, + body: params.body, + headers: params.headers, + timeout: this.serverConfig.EXTERNAL_REQUEST_TIMEOUT_MS, + }) + + let responseBody = await fetchResponse.text() + try { + responseBody = JSON.parse(responseBody) + } catch (err) { + // Ignore + } + + const duration = performance.now() - start + + resParams.timings!.push({ + kind: 'async_function', + duration_ms: duration, + }) + + resParams.response = { + status: fetchResponse.status, + body: responseBody, + } + } catch (err) { + status.error('🦔', `[HogExecutor] Error during fetch`, { error: String(err) }) + resParams.error = 'Something went wrong with the fetch request.' + } + + return { + invocation: { + ...invocation, + queue: 'hog', + queueParameters: resParams, + }, + finished: false, + logs: [], + } + } +} diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index e43d33c253b6f..c70406370bb93 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -9,6 +9,7 @@ import { HogFunctionInvocationGlobals, HogFunctionInvocationGlobalsWithInputs, HogFunctionInvocationResult, + HogFunctionQueueParametersFetchResponse, HogFunctionType, } from './types' import { convertToHogFunctionFilterGlobal } from './utils' @@ -146,8 +147,18 @@ export class HogExecutor { try { // If the queueParameter is set then we have an expected format that we want to parse and add to the stack + console.log('EXEC', invocation.queue, invocation.queueParameters) if (invocation.queueParameters) { - const { logs = [], response = null, error, timings = [] } = invocation.queueParameters + const { + logs = [], + response = null, + error, + timings = [], + } = invocation.queueParameters as HogFunctionQueueParametersFetchResponse + + // Reset the queue parameters to be sure + invocation.queue = 'hog' + invocation.queueParameters = undefined // Special handling for fetch // TODO: Would be good to have a dedicated value in the fetch response for the status code @@ -176,14 +187,11 @@ export class HogExecutor { } } + console.log('Addding to vmstate!', response) // Add the response to the stack to continue execution invocation.vmState!.stack.push(response) invocation.timings.push(...timings) result.logs = [...logs, ...result.logs] - - // Reset the queue parameters to be sure - invocation.queue = 'hog' - invocation.queueParameters = undefined } const start = performance.now() diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index 99b0ca6d5d607..bf81426021ff5 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -153,13 +153,36 @@ export interface HogFunctionTiming { duration_ms: number } +export type HogFunctionQueueParametersFetchRequest = { + url: string + method: string + body: string + headers: Record +} + +export type HogFunctionQueueParametersFetchResponse = { + /** An error message to indicate something went wrong and the invocation should be stopped */ + error?: any + /** The data to be passed to the Hog function from the response */ + response?: { + status: number + body: any + } | null + timings?: HogFunctionTiming[] + logs?: LogEntry[] +} + +export type HogFunctionInvocationQueueParameters = + | HogFunctionQueueParametersFetchRequest + | HogFunctionQueueParametersFetchResponse + export type HogFunctionInvocation = { id: string globals: HogFunctionInvocationGlobals teamId: Team['id'] hogFunction: HogFunctionType queue: 'hog' | 'fetch' - queueParameters?: Record + queueParameters?: HogFunctionInvocationQueueParameters // The current vmstate (set if the invocation is paused) vmState?: VMState timings: HogFunctionTiming[] @@ -170,18 +193,6 @@ export type HogFunctionAsyncFunctionRequest = { args: any[] } -export type HogFunctionAsyncFunctionResponse = { - /** An error message to indicate something went wrong and the invocation should be stopped */ - error?: any - /** The data to be passed to the Hog function from the response */ - response?: { - status: number - body: any - } | null - timings?: HogFunctionTiming[] - logs?: LogEntry[] -} - // The result of an execution export type HogFunctionInvocationResult = { invocation: HogFunctionInvocation diff --git a/plugin-server/src/main/pluginsServer.ts b/plugin-server/src/main/pluginsServer.ts index 6f26f480451db..3915738b2178e 100644 --- a/plugin-server/src/main/pluginsServer.ts +++ b/plugin-server/src/main/pluginsServer.ts @@ -11,12 +11,7 @@ import v8Profiler from 'v8-profiler-next' import { getPluginServerCapabilities } from '../capabilities' import { CdpApi } from '../cdp/cdp-api' -import { - CdpCyclotronWorker, - CdpFunctionCallbackConsumer, - CdpOverflowConsumer, - CdpProcessedEventsConsumer, -} from '../cdp/cdp-consumers' +import { CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../cdp/cdp-consumers' import { defaultConfig, sessionRecordingConsumerConfig } from '../config/config' import { Hub, PluginServerCapabilities, PluginsServerConfig } from '../types' import { createHub, createKafkaClient, createKafkaProducerWrapper } from '../utils/db/hub' @@ -525,26 +520,26 @@ export async function startPluginsServer( } } - if (capabilities.cdpFunctionOverflow) { - ;[hub, closeHub] = hub ? [hub, closeHub] : await createHub(serverConfig, capabilities) - const consumer = new CdpOverflowConsumer(hub) - await consumer.start() - - shutdownOnConsumerExit(consumer.batchConsumer!) - shutdownCallbacks.push(async () => await consumer.stop()) - healthChecks['cdp-overflow'] = () => consumer.isHealthy() ?? false - } - - if (capabilities.cdpCyclotronWorker) { - ;[hub, closeHub] = hub ? [hub, closeHub] : await createHub(serverConfig, capabilities) - if (hub.CYCLOTRON_DATABASE_URL) { - const worker = new CdpCyclotronWorker(hub) - await worker.start() - } else { - // This is a temporary solution until we *require* Cyclotron to be configured. - status.warn('💥', 'CYCLOTRON_DATABASE_URL is not set, not running Cyclotron worker') - } - } + // if (capabilities.cdpFunctionOverflow) { + // ;[hub, closeHub] = hub ? [hub, closeHub] : await createHub(serverConfig, capabilities) + // const consumer = new CdpOverflowConsumer(hub) + // await consumer.start() + + // shutdownOnConsumerExit(consumer.batchConsumer!) + // shutdownCallbacks.push(async () => await consumer.stop()) + // healthChecks['cdp-overflow'] = () => consumer.isHealthy() ?? false + // } + + // if (capabilities.cdpCyclotronWorker) { + // ;[hub, closeHub] = hub ? [hub, closeHub] : await createHub(serverConfig, capabilities) + // if (hub.CYCLOTRON_DATABASE_URL) { + // const worker = new CdpCyclotronWorker(hub) + // await worker.start() + // } else { + // // This is a temporary solution until we *require* Cyclotron to be configured. + // status.warn('💥', 'CYCLOTRON_DATABASE_URL is not set, not running Cyclotron worker') + // } + // } if (capabilities.http) { const app = setupCommonRoutes(healthChecks, serverInstance?.queue ?? undefined) diff --git a/plugin-server/tests/cdp/cdp-api.test.ts b/plugin-server/tests/cdp/cdp-api.test.ts index 5b45946816fea..33511badcc062 100644 --- a/plugin-server/tests/cdp/cdp-api.test.ts +++ b/plugin-server/tests/cdp/cdp-api.test.ts @@ -222,7 +222,7 @@ describe('CDP API', () => { }, { level: 'debug', - message: "Suspending function due to async function call 'fetch'. Payload: 1689 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 1960 bytes", }, { level: 'debug', diff --git a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts new file mode 100644 index 0000000000000..b38cefc741130 --- /dev/null +++ b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts @@ -0,0 +1,225 @@ +import { CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../../src/cdp/cdp-consumers' +import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' +import { Hub, Team } from '../../src/types' +import { createHub } from '../../src/utils/db/hub' +import { getFirstTeam, resetTestDatabase } from '../helpers/sql' +import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' +import { createHogExecutionGlobals, insertHogFunction as _insertHogFunction } from './fixtures' + +const mockConsumer = { + on: jest.fn(), + commitSync: jest.fn(), + commit: jest.fn(), + queryWatermarkOffsets: jest.fn(), + committed: jest.fn(), + assignments: jest.fn(), + isConnected: jest.fn(() => true), + getMetadata: jest.fn(), +} + +jest.mock('../../src/kafka/batch-consumer', () => { + return { + startBatchConsumer: jest.fn(() => + Promise.resolve({ + join: () => ({ + finally: jest.fn(), + }), + stop: jest.fn(), + consumer: mockConsumer, + }) + ), + } +}) + +jest.mock('../../src/utils/fetch', () => { + return { + trackedFetch: jest.fn(() => + Promise.resolve({ + status: 200, + text: () => Promise.resolve(JSON.stringify({ success: true })), + json: () => Promise.resolve({ success: true }), + }) + ), + } +}) + +jest.mock('../../src/utils/db/kafka-producer-wrapper', () => { + const mockKafkaProducer = { + producer: { + connect: jest.fn(), + }, + disconnect: jest.fn(), + produce: jest.fn(() => Promise.resolve()), + } + return { + KafkaProducerWrapper: jest.fn(() => mockKafkaProducer), + } +}) + +const mockFetch: jest.Mock = require('../../src/utils/fetch').trackedFetch + +const mockProducer = require('../../src/utils/db/kafka-producer-wrapper').KafkaProducerWrapper() + +jest.setTimeout(1000) + +const decodeKafkaMessage = (message: any): any => { + return { + ...message, + value: JSON.parse(message.value.toString()), + } +} + +const decodeAllKafkaMessages = (): any[] => { + return mockProducer.produce.mock.calls.map((x) => decodeKafkaMessage(x[0])) +} + +const convertToKafkaMessage = (message: any): any => { + return { + ...message, + value: Buffer.from(JSON.stringify(message.value)), + } +} + +/** + * NOTE: This isn't fully e2e... We still mock kafka but we trigger one queue from the other in a loop + */ +describe('CDP Consumers E2E', () => { + let processedEventsConsumer: CdpProcessedEventsConsumer + let functionProcessor: CdpFunctionCallbackConsumer + let hub: Hub + let closeHub: () => Promise + let team: Team + + const insertHogFunction = async (hogFunction: Partial) => { + const item = await _insertHogFunction(hub.postgres, team.id, hogFunction) + // Trigger the reload that django would do + await processedEventsConsumer.hogFunctionManager.reloadAllHogFunctions() + await functionProcessor.hogFunctionManager.reloadAllHogFunctions() + return item + } + + beforeEach(async () => { + await resetTestDatabase() + ;[hub, closeHub] = await createHub() + team = await getFirstTeam(hub) + + processedEventsConsumer = new CdpProcessedEventsConsumer(hub) + await processedEventsConsumer.start() + functionProcessor = new CdpFunctionCallbackConsumer(hub) + await functionProcessor.start() + + mockFetch.mockClear() + }) + + afterEach(async () => { + jest.setTimeout(10000) + await processedEventsConsumer.stop() + await functionProcessor.stop() + await closeHub() + }) + + afterAll(() => { + jest.useRealTimers() + }) + + describe('e2e fetch function', () => { + /** + * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios + */ + + let fnFetchNoFilters: HogFunctionType + let globals: HogFunctionInvocationGlobals + + let kafkaMessages = { + metrics: [] as any[], + logs: [] as any[], + invocations: [] as any[], + } + + beforeEach(async () => { + fnFetchNoFilters = await insertHogFunction({ + ...HOG_EXAMPLES.simple_fetch, + ...HOG_INPUTS_EXAMPLES.simple_fetch, + ...HOG_FILTERS_EXAMPLES.no_filters, + }) + + globals = createHogExecutionGlobals({ + project: { + id: team.id, + } as any, + event: { + uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', + name: '$pageview', + properties: { + $current_url: 'https://posthog.com', + $lib_version: '1.0.0', + }, + } as any, + }) + + kafkaMessages = { + metrics: [], + logs: [], + invocations: [], + } + }) + + const gatherProducedMessages = () => { + const allMessages = decodeAllKafkaMessages() + + allMessages.forEach((message) => { + if (message.topic === 'clickhouse_app_metrics2_test') { + kafkaMessages.metrics.push(message) + } else if (message.topic === 'log_entries_test') { + kafkaMessages.logs.push(message) + } else if (message.topic === 'cdp_function_callbacks_test') { + kafkaMessages.invocations.push(message) + } else { + throw new Error(`Unknown topic: ${message.topic}`) + } + }) + + mockProducer.produce.mockClear() + } + + it('should invoke a function via kafka transportation until completed', async () => { + // NOTE: We can skip kafka as the entry point + const invocations = await processedEventsConsumer.processBatch([globals]) + expect(invocations).toHaveLength(1) + gatherProducedMessages() + + expect(kafkaMessages.invocations).toHaveLength(1) + expect(kafkaMessages.invocations[0].topic).toEqual('cdp_function_callbacks_test') + mockProducer.produce.mockClear() + + while (kafkaMessages.invocations.length) { + await functionProcessor._handleKafkaBatch([convertToKafkaMessage(kafkaMessages.invocations[0])]) + kafkaMessages.invocations = [] + gatherProducedMessages() + } + + expect(kafkaMessages.metrics).toMatchObject([ + { + key: fnFetchNoFilters.id.toString(), + value: { + app_source: 'hog_function', + app_source_id: fnFetchNoFilters.id.toString(), + count: 1, + metric_kind: 'success', + metric_name: 'succeeded', + team_id: 2, + }, + }, + ]) + expect(kafkaMessages.logs.map((x) => x.value.message)).toMatchInlineSnapshot(` + Array [ + "Executing function", + "Suspending function due to async function call 'fetch'. Payload: 1852 bytes", + "Resuming function", + "Fetch response:, null", + "Function completed in 1.1098760068416595ms. Sync: 0ms. Mem: 834 bytes. Ops: 22.", + ] + `) + }) + }) +}) From 6782daa4f3a203f9a9b5df06e45fcd527c02627c Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 29 Aug 2024 18:15:58 +0200 Subject: [PATCH 07/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 14 ++++++++++---- plugin-server/src/cdp/hog-executor.ts | 2 -- plugin-server/src/cdp/types.ts | 10 +++------- plugin-server/tests/cdp/cdp-consumer.e2e.test.ts | 16 +++++++--------- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 6f9435e93d1e9..d635628cd0411 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -30,12 +30,12 @@ import { HogWatcher, HogWatcherState } from './hog-watcher' import { CdpRedis, createCdpRedisPool } from './redis' import { HogFunctionInvocation, - HogFunctionInvocationAsyncResponse, HogFunctionInvocationGlobals, HogFunctionInvocationResult, HogFunctionInvocationSerialized, HogFunctionMessageToProduce, HogFunctionType, + HogHooksFetchResponse, } from './types' import { convertToCaptureEvent, @@ -473,8 +473,14 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { func: async () => { // TODO: Handle if the invocation step is not "hog" so we should do fetch instead... - const results = await this.runManyWithHeartbeat(invocations, (item) => this.hogExecutor.execute(item)) - return results + const fetchQueue = invocations.filter((item) => item.queue === 'fetch') + const fetchResults = await this.runManyWithHeartbeat(fetchQueue, (item) => + this.fetchExecutor.execute(item) + ) + + const hogQueue = invocations.filter((item) => item.queue === 'hog') + const hogResults = await this.runManyWithHeartbeat(hogQueue, (item) => this.hogExecutor.execute(item)) + return [...hogResults, ...(fetchResults.filter(Boolean) as HogFunctionInvocationResult[])] }, }) @@ -541,7 +547,7 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { const invocations: HogFunctionInvocation[] = [] // Parse the base message value - const entries: (HogFunctionInvocationAsyncResponse | HogFunctionInvocationSerialized)[] = messages + const entries: (HogHooksFetchResponse | HogFunctionInvocationSerialized)[] = messages .map((message) => { try { return JSON.parse(message.value!.toString()) diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index c70406370bb93..7ea9a24676263 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -147,7 +147,6 @@ export class HogExecutor { try { // If the queueParameter is set then we have an expected format that we want to parse and add to the stack - console.log('EXEC', invocation.queue, invocation.queueParameters) if (invocation.queueParameters) { const { logs = [], @@ -187,7 +186,6 @@ export class HogExecutor { } } - console.log('Addding to vmstate!', response) // Add the response to the stack to continue execution invocation.vmState!.stack.push(response) invocation.timings.push(...timings) diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index bf81426021ff5..0e3e281c3bf13 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -210,11 +210,11 @@ export type HogFunctionInvocationAsyncRequest = { asyncFunctionRequest?: HogFunctionAsyncFunctionRequest } -export type HogFunctionInvocationAsyncResponse = { +export type HogHooksFetchResponse = { state: string // Serialized HogFunctionInvocation teamId: number hogFunctionId: HogFunctionType['id'] - asyncFunctionResponse: HogFunctionAsyncFunctionResponse + asyncFunctionResponse: HogFunctionQueueParametersFetchResponse } export type HogFunctionInvocationSerialized = { @@ -271,11 +271,7 @@ export type IntegrationType = { export type HogFunctionMessageToProduce = { topic: string - value: - | HogFunctionLogEntrySerialized - | HogFunctionInvocationAsyncResponse - | AppMetric2Type - | HogFunctionInvocationSerialized + value: HogFunctionLogEntrySerialized | HogHooksFetchResponse | AppMetric2Type | HogFunctionInvocationSerialized key: string } diff --git a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts index b38cefc741130..90c25bf7e7284 100644 --- a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts @@ -211,15 +211,13 @@ describe('CDP Consumers E2E', () => { }, }, ]) - expect(kafkaMessages.logs.map((x) => x.value.message)).toMatchInlineSnapshot(` - Array [ - "Executing function", - "Suspending function due to async function call 'fetch'. Payload: 1852 bytes", - "Resuming function", - "Fetch response:, null", - "Function completed in 1.1098760068416595ms. Sync: 0ms. Mem: 834 bytes. Ops: 22.", - ] - `) + expect(kafkaMessages.logs.map((x) => x.value.message)).toEqual([ + 'Executing function', + "Suspending function due to async function call 'fetch'. Payload: 1852 bytes", + 'Resuming function', + 'Fetch response:, {"status":200,"body":{"success":true}}', + expect.stringContaining('Function completed'), + ]) }) }) }) From c1d971052d9e9cdc5dc155421551c06c96003335 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 29 Aug 2024 18:18:32 +0200 Subject: [PATCH 08/75] fixes --- frontend/src/scenes/userLogic.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/frontend/src/scenes/userLogic.ts b/frontend/src/scenes/userLogic.ts index a15f205a46064..9db1e96fa8806 100644 --- a/frontend/src/scenes/userLogic.ts +++ b/frontend/src/scenes/userLogic.ts @@ -131,7 +131,6 @@ export const userLogic = kea([ posthog.register({ is_demo_project: user.team?.is_demo, - is_impersonated: user.is_impersonated, }) if (user.team) { From 75472d7e254dc639f783acf2baf45d668e6c061c Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 29 Aug 2024 18:20:03 +0200 Subject: [PATCH 09/75] Removed overflow --- plugin-server/src/capabilities.ts | 6 ------ plugin-server/src/main/pluginsServer.ts | 10 ---------- plugin-server/src/types.ts | 2 -- plugin-server/tests/server.test.ts | 1 - 4 files changed, 19 deletions(-) diff --git a/plugin-server/src/capabilities.ts b/plugin-server/src/capabilities.ts index 7b8c8461b78ad..6a9d30af15ff4 100644 --- a/plugin-server/src/capabilities.ts +++ b/plugin-server/src/capabilities.ts @@ -25,7 +25,6 @@ export function getPluginServerCapabilities(config: PluginsServerConfig): Plugin preflightSchedules: true, cdpProcessedEvents: true, cdpFunctionCallbacks: true, - cdpFunctionOverflow: true, cdpCyclotronWorker: true, syncInlinePlugins: true, ...sharedCapabilities, @@ -104,11 +103,6 @@ export function getPluginServerCapabilities(config: PluginsServerConfig): Plugin cdpFunctionCallbacks: true, ...sharedCapabilities, } - case PluginServerMode.cdp_function_overflow: - return { - cdpFunctionOverflow: true, - ...sharedCapabilities, - } case PluginServerMode.cdp_cyclotron_worker: return { cdpCyclotronWorker: true, diff --git a/plugin-server/src/main/pluginsServer.ts b/plugin-server/src/main/pluginsServer.ts index 3915738b2178e..97f18c4ea1def 100644 --- a/plugin-server/src/main/pluginsServer.ts +++ b/plugin-server/src/main/pluginsServer.ts @@ -520,16 +520,6 @@ export async function startPluginsServer( } } - // if (capabilities.cdpFunctionOverflow) { - // ;[hub, closeHub] = hub ? [hub, closeHub] : await createHub(serverConfig, capabilities) - // const consumer = new CdpOverflowConsumer(hub) - // await consumer.start() - - // shutdownOnConsumerExit(consumer.batchConsumer!) - // shutdownCallbacks.push(async () => await consumer.stop()) - // healthChecks['cdp-overflow'] = () => consumer.isHealthy() ?? false - // } - // if (capabilities.cdpCyclotronWorker) { // ;[hub, closeHub] = hub ? [hub, closeHub] : await createHub(serverConfig, capabilities) // if (hub.CYCLOTRON_DATABASE_URL) { diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index 1d596f034d81e..a776f698498ad 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -84,7 +84,6 @@ export enum PluginServerMode { recordings_blob_ingestion_overflow = 'recordings-blob-ingestion-overflow', cdp_processed_events = 'cdp-processed-events', cdp_function_callbacks = 'cdp-function-callbacks', - cdp_function_overflow = 'cdp-function-overflow', cdp_cyclotron_worker = 'cdp-cyclotron-worker', functional_tests = 'functional-tests', } @@ -348,7 +347,6 @@ export interface PluginServerCapabilities { sessionRecordingBlobOverflowIngestion?: boolean cdpProcessedEvents?: boolean cdpFunctionCallbacks?: boolean - cdpFunctionOverflow?: boolean cdpCyclotronWorker?: boolean appManagementSingleton?: boolean preflightSchedules?: boolean // Used for instance health checks on hobby deploy, not useful on cloud diff --git a/plugin-server/tests/server.test.ts b/plugin-server/tests/server.test.ts index 009416547b36d..3b8f7d58dda29 100644 --- a/plugin-server/tests/server.test.ts +++ b/plugin-server/tests/server.test.ts @@ -96,7 +96,6 @@ describe('server', () => { processAsyncWebhooksHandlers: true, cdpProcessedEvents: true, cdpFunctionCallbacks: true, - cdpFunctionOverflow: true, cdpCyclotronWorker: true, syncInlinePlugins: true, } From 3861150ffb263513a76d65029cd821248b7f1280 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 29 Aug 2024 18:21:40 +0200 Subject: [PATCH 10/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index d635628cd0411..cfb4484c4e646 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -207,10 +207,8 @@ abstract class CdpConsumerBase { // TODO: Add cylcotron check here // For now we just enqueue to kafka - // For kafka style this is overkill to enqueue this way but it simplifies migrating to the new system - // TODO: Convert to the right format for a job const request: HogFunctionInvocationSerialized = { state: await gzipObject(invocation), } @@ -471,8 +469,8 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { const invocationResults = await runInstrumentedFunction({ statsKey: `cdpConsumer.handleEachBatch.executeInvocations`, func: async () => { - // TODO: Handle if the invocation step is not "hog" so we should do fetch instead... - + // NOTE: In the future this service will never do fetching (unless we decide we want to do it in node at some point) + // This is just "for now" to support the transition to cyclotron const fetchQueue = invocations.filter((item) => item.queue === 'fetch') const fetchResults = await this.runManyWithHeartbeat(fetchQueue, (item) => this.fetchExecutor.execute(item) From 43803c4bb31381bc2002efb1fa95ee40d7271b0b Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 29 Aug 2024 18:25:19 +0200 Subject: [PATCH 11/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index cfb4484c4e646..e8ff93236383f 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -203,9 +203,7 @@ abstract class CdpConsumerBase { } protected async queueInvocation(invocation: HogFunctionInvocation) { - // Depending on flags we enqueue either to kafka or to cyclotron - - // TODO: Add cylcotron check here + // TODO: Add cylcotron check here and enqueue that way // For now we just enqueue to kafka // For kafka style this is overkill to enqueue this way but it simplifies migrating to the new system @@ -588,29 +586,23 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { // // TODO: Split out non-Kafka specific parts of CdpConsumerBase so that it can be used by the // // Cyclotron worker below. Or maybe we can just wait, and rip the Kafka bits out once Cyclotron is -// // shipped (and rename it something other than consomer, probably). For now, this is an easy way to +// // shipped (and rename it something other than consumer, probably). For now, this is an easy way to // // use existing code and get an end-to-end demo shipped. -// export class CdpCyclotronWorker extends CdpConsumerBase { +// export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { // protected name = 'CdpCyclotronWorker' // protected topic = 'UNUSED-CdpCyclotronWorker' // protected consumerGroupId = 'UNUSED-CdpCyclotronWorker' // private runningWorker: Promise | undefined // private isUnhealthy = false -// public async _handleEachBatch(_: Message[]): Promise { -// // Not called, we override `start` below to use Cyclotron instead. -// } - // private async innerStart() { // try { // const limit = 100 // TODO: Make configurable. // while (!this.isStopping) { // const jobs = await cyclotron.dequeueJobsWithVmState('hog', limit) -// for (const job of jobs) { -// // TODO: Reassemble a HogFunctionInvocationAsyncResponse (or whatever proper type) -// // from the fields on the job, and then execute the next Hog step. -// console.log(job.id) -// } +// // TODO: Decode jobs into the right types + +// await this.processBatch(jobs) // } // } catch (err) { // this.isUnhealthy = true From 805ed9c14405b7c7fa68a0b9bb30310cba1c627e Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 2 Sep 2024 10:07:14 +0200 Subject: [PATCH 12/75] Fixes --- plugin-server/tests/cdp/cdp-consumer.e2e.test.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts index 90c25bf7e7284..8d6581aef9ef0 100644 --- a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts @@ -1,7 +1,7 @@ import { CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../../src/cdp/cdp-consumers' import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' import { Hub, Team } from '../../src/types' -import { createHub } from '../../src/utils/db/hub' +import { closeHub, createHub } from '../../src/utils/db/hub' import { getFirstTeam, resetTestDatabase } from '../helpers/sql' import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' import { createHogExecutionGlobals, insertHogFunction as _insertHogFunction } from './fixtures' @@ -87,7 +87,6 @@ describe('CDP Consumers E2E', () => { let processedEventsConsumer: CdpProcessedEventsConsumer let functionProcessor: CdpFunctionCallbackConsumer let hub: Hub - let closeHub: () => Promise let team: Team const insertHogFunction = async (hogFunction: Partial) => { @@ -100,7 +99,7 @@ describe('CDP Consumers E2E', () => { beforeEach(async () => { await resetTestDatabase() - ;[hub, closeHub] = await createHub() + hub = await createHub() team = await getFirstTeam(hub) processedEventsConsumer = new CdpProcessedEventsConsumer(hub) @@ -115,7 +114,7 @@ describe('CDP Consumers E2E', () => { jest.setTimeout(10000) await processedEventsConsumer.stop() await functionProcessor.stop() - await closeHub() + await closeHub(hub) }) afterAll(() => { @@ -213,7 +212,7 @@ describe('CDP Consumers E2E', () => { ]) expect(kafkaMessages.logs.map((x) => x.value.message)).toEqual([ 'Executing function', - "Suspending function due to async function call 'fetch'. Payload: 1852 bytes", + "Suspending function due to async function call 'fetch'. Payload: 1902 bytes", 'Resuming function', 'Fetch response:, {"status":200,"body":{"success":true}}', expect.stringContaining('Function completed'), From 6e329e79e759d1d477bbb156e75d70d1c2154ef9 Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 2 Sep 2024 10:29:25 +0200 Subject: [PATCH 13/75] fix --- plugin-server/src/cdp/cdp-consumers.ts | 47 ++++++++++++++++++++++---- plugin-server/src/cdp/types.ts | 13 +++++-- 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 9e1b9be51844f..35a050c60c36c 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -33,6 +33,7 @@ import { HogFunctionInvocationGlobals, HogFunctionInvocationResult, HogFunctionInvocationSerialized, + HogFunctionInvocationSerializedCompressed, HogFunctionMessageToProduce, HogFunctionType, HogHooksFetchResponse, @@ -216,8 +217,15 @@ abstract class CdpConsumerBase { // For now we just enqueue to kafka // For kafka style this is overkill to enqueue this way but it simplifies migrating to the new system - const request: HogFunctionInvocationSerialized = { - state: await gzipObject(invocation), + const serializedInvocation: HogFunctionInvocationSerialized = { + ...invocation, + hogFunctionId: invocation.hogFunction.id, + } + + delete (serializedInvocation as any).hogFunction + + const request: HogFunctionInvocationSerializedCompressed = { + state: await gzipObject(serializedInvocation), } // NOTE: This is very temporary as it is producing the response. the response will actually be produced by the 3rd party service @@ -552,7 +560,7 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { const invocations: HogFunctionInvocation[] = [] // Parse the base message value - const entries: (HogHooksFetchResponse | HogFunctionInvocationSerialized)[] = messages + const entries: (HogHooksFetchResponse | HogFunctionInvocationSerializedCompressed)[] = messages .map((message) => { try { return JSON.parse(message.value!.toString()) @@ -567,15 +575,34 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { // Deserialize the compressed data await Promise.all( entries.map(async (item) => { - // If it looks like a try { - const invocation = await unGzipObject(item.state) + const invocationSerialized = await unGzipObject( + item.state + ) if ('asyncFunctionResponse' in item) { // This means it is a callback from hoghooks so we need to add the response to the invocation - invocation.queue = 'hog' - invocation.queueParameters = item.asyncFunctionResponse + invocationSerialized.queue = 'hog' + invocationSerialized.queueParameters = item.asyncFunctionResponse + } + + const hogFunction = this.hogFunctionManager.getHogFunction( + invocationSerialized.hogFunctionId + ) + if (!hogFunction) { + status.error('Error finding hog function', { + id: invocationSerialized.hogFunctionId, + }) + return } + + const invocation: HogFunctionInvocation = { + ...invocationSerialized, + hogFunction, + } + + delete (invocation as any).hogFunctionId + invocations.push(invocation) } catch (e) { status.error('Error unzipping message', e, item.state) @@ -584,6 +611,12 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { }) ) + invocations.forEach((item) => { + if (!item.hogFunction?.id) { + console.error('No hog function id', item) + } + }) + return invocations }, }) diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index 0e3e281c3bf13..19740db81306a 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -217,7 +217,12 @@ export type HogHooksFetchResponse = { asyncFunctionResponse: HogFunctionQueueParametersFetchResponse } -export type HogFunctionInvocationSerialized = { +export type HogFunctionInvocationSerialized = Omit & { + // When serialized to kafka / cyclotron we only store the ID + hogFunctionId: HogFunctionType['id'] +} + +export type HogFunctionInvocationSerializedCompressed = { state: string // Serialized HogFunctionInvocation } @@ -271,7 +276,11 @@ export type IntegrationType = { export type HogFunctionMessageToProduce = { topic: string - value: HogFunctionLogEntrySerialized | HogHooksFetchResponse | AppMetric2Type | HogFunctionInvocationSerialized + value: + | HogFunctionLogEntrySerialized + | HogHooksFetchResponse + | AppMetric2Type + | HogFunctionInvocationSerializedCompressed key: string } From db73a6177e290c15d4079fbfdec9b2cd38dff80f Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 2 Sep 2024 12:01:48 +0200 Subject: [PATCH 14/75] Fixes --- plugin-server/tests/cdp/cdp-api.test.ts | 4 ++-- plugin-server/tests/cdp/cdp-consumer.e2e.test.ts | 7 ++++--- plugin-server/tests/cdp/hog-executor.test.ts | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/plugin-server/tests/cdp/cdp-api.test.ts b/plugin-server/tests/cdp/cdp-api.test.ts index 33511badcc062..7d2cc5f63cc49 100644 --- a/plugin-server/tests/cdp/cdp-api.test.ts +++ b/plugin-server/tests/cdp/cdp-api.test.ts @@ -174,7 +174,7 @@ describe('CDP API', () => { }, { level: 'debug', - message: "Suspending function due to async function call 'fetch'. Payload: 1960 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 2010 bytes", }, { level: 'info', @@ -222,7 +222,7 @@ describe('CDP API', () => { }, { level: 'debug', - message: "Suspending function due to async function call 'fetch'. Payload: 1960 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 2010 bytes", }, { level: 'debug', diff --git a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts index 8d6581aef9ef0..98cb2e041b042 100644 --- a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts @@ -1,7 +1,7 @@ import { CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../../src/cdp/cdp-consumers' import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' import { Hub, Team } from '../../src/types' -import { closeHub, createHub } from '../../src/utils/db/hub' +import { createHub } from '../../src/utils/db/hub' import { getFirstTeam, resetTestDatabase } from '../helpers/sql' import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' import { createHogExecutionGlobals, insertHogFunction as _insertHogFunction } from './fixtures' @@ -87,6 +87,7 @@ describe('CDP Consumers E2E', () => { let processedEventsConsumer: CdpProcessedEventsConsumer let functionProcessor: CdpFunctionCallbackConsumer let hub: Hub + let closeHub: () => Promise let team: Team const insertHogFunction = async (hogFunction: Partial) => { @@ -99,7 +100,7 @@ describe('CDP Consumers E2E', () => { beforeEach(async () => { await resetTestDatabase() - hub = await createHub() + ;[hub, closeHub] = await createHub() team = await getFirstTeam(hub) processedEventsConsumer = new CdpProcessedEventsConsumer(hub) @@ -114,7 +115,7 @@ describe('CDP Consumers E2E', () => { jest.setTimeout(10000) await processedEventsConsumer.stop() await functionProcessor.stop() - await closeHub(hub) + await closeHub() }) afterAll(() => { diff --git a/plugin-server/tests/cdp/hog-executor.test.ts b/plugin-server/tests/cdp/hog-executor.test.ts index ae7e609d02155..dc6350e0bb3d2 100644 --- a/plugin-server/tests/cdp/hog-executor.test.ts +++ b/plugin-server/tests/cdp/hog-executor.test.ts @@ -94,7 +94,7 @@ describe('Hog Executor', () => { { timestamp: expect.any(DateTime), level: 'debug', - message: "Suspending function due to async function call 'fetch'. Payload: 1768 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 1818 bytes", }, ]) }) @@ -174,7 +174,7 @@ describe('Hog Executor', () => { expect(logs.map((log) => log.message)).toMatchInlineSnapshot(` Array [ "Executing function", - "Suspending function due to async function call 'fetch'. Payload: 1768 bytes", + "Suspending function due to async function call 'fetch'. Payload: 1818 bytes", "Resuming function", "Fetch response:, {\\"status\\":200,\\"body\\":\\"success\\"}", "Function completed in 100ms. Sync: 0ms. Mem: 750 bytes. Ops: 22.", @@ -196,7 +196,7 @@ describe('Hog Executor', () => { expect(logs.map((log) => log.message)).toMatchInlineSnapshot(` Array [ "Executing function", - "Suspending function due to async function call 'fetch'. Payload: 1768 bytes", + "Suspending function due to async function call 'fetch'. Payload: 1818 bytes", "Resuming function", "Fetch response:, {\\"status\\":200,\\"body\\":{\\"foo\\":\\"bar\\"}}", "Function completed in 100ms. Sync: 0ms. Mem: 750 bytes. Ops: 22.", From 6d55fb1cdd5e141cbd0ab67649b891b54e4d97b1 Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 2 Sep 2024 12:23:05 +0200 Subject: [PATCH 15/75] Fix up look sharp --- plugin-server/src/cdp/cdp-consumers.ts | 116 ++++++++++-------- plugin-server/src/config/config.ts | 1 + plugin-server/src/types.ts | 1 + .../cdp/cdp-processed-events-consumer.test.ts | 64 +++++++++- 4 files changed, 130 insertions(+), 52 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 48824e09d3650..764f7e0842e7e 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -228,6 +228,53 @@ abstract class CdpConsumerBase { }) } + protected async processInvocationResults(results: HogFunctionInvocationResult[]): Promise { + await runInstrumentedFunction({ + statsKey: `cdpConsumer.handleEachBatch.produceResults`, + func: async () => { + console.log('Processing invocations results', results.length) + + await Promise.all( + results.map(async (result) => { + // Tricky: We want to pull all the logs out as we don't want them to be passed around to any subsequent functions + if (result.finished || result.error) { + this.produceAppMetric({ + team_id: result.invocation.teamId, + app_source_id: result.invocation.hogFunction.id, + metric_kind: result.error ? 'failure' : 'success', + metric_name: result.error ? 'failed' : 'succeeded', + count: 1, + }) + } + + this.produceLogs(result) + + // PostHog capture events + const capturedEvents = result.capturedPostHogEvents + delete result.capturedPostHogEvents + + for (const event of capturedEvents ?? []) { + const team = await this.hub.teamManager.fetchTeam(event.team_id) + if (!team) { + continue + } + this.messagesToProduce.push({ + topic: KAFKA_EVENTS_PLUGIN_INGESTION, + value: convertToCaptureEvent(event, team), + key: `${team!.api_token}:${event.distinct_id}`, + }) + } + + if (!result.finished) { + // If it isn't finished then we need to put it back on the queue + await this.queueInvocation(result.invocation) + } + }) + ) + }, + }) + } + protected async startKafkaConsumer() { this.batchConsumer = await startBatchConsumer({ connectionConfig: createRdConnectionConfigFromEnvVars(this.hub), @@ -334,7 +381,27 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { const invocationsToBeQueued = await this.runWithHeartbeat(() => this.createHogFunctionInvocations(invocationGlobals) ) - await this.queueInvocations(invocationsToBeQueued) + + if (this.hub.CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP) { + // NOTE: This is for testing the two ways of enqueueing processing. It will be swapped out for a cyclotron env check + // Kafka based workflow + const invocationResults = await runInstrumentedFunction({ + statsKey: `cdpConsumer.handleEachBatch.executeInvocations`, + func: async () => { + const hogResults = await this.runManyWithHeartbeat(invocationsToBeQueued, (item) => + this.hogExecutor.execute(item) + ) + return [...hogResults] + }, + }) + + await this.hogWatcher.observeResults(invocationResults) + await this.processInvocationResults(invocationResults) + } else { + await this.queueInvocations(invocationsToBeQueued) + await this.produceQueuedMessages() + } + await this.produceQueuedMessages() return invocationsToBeQueued @@ -493,53 +560,6 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { await this.produceQueuedMessages() } - protected async processInvocationResults(results: HogFunctionInvocationResult[]): Promise { - await runInstrumentedFunction({ - statsKey: `cdpConsumer.handleEachBatch.produceResults`, - func: async () => { - console.log('Processing invocations results', results.length) - - await Promise.all( - results.map(async (result) => { - // Tricky: We want to pull all the logs out as we don't want them to be passed around to any subsequent functions - if (result.finished || result.error) { - this.produceAppMetric({ - team_id: result.invocation.teamId, - app_source_id: result.invocation.hogFunction.id, - metric_kind: result.error ? 'failure' : 'success', - metric_name: result.error ? 'failed' : 'succeeded', - count: 1, - }) - } - - this.produceLogs(result) - - // PostHog capture events - const capturedEvents = result.capturedPostHogEvents - delete result.capturedPostHogEvents - - for (const event of capturedEvents ?? []) { - const team = await this.hub.teamManager.fetchTeam(event.team_id) - if (!team) { - continue - } - this.messagesToProduce.push({ - topic: KAFKA_EVENTS_PLUGIN_INGESTION, - value: convertToCaptureEvent(event, team), - key: `${team!.api_token}:${event.distinct_id}`, - }) - } - - if (!result.finished) { - // If it isn't finished then we need to put it back on the queue - await this.queueInvocation(result.invocation) - } - }) - ) - }, - }) - } - public async _handleKafkaBatch(messages: Message[]): Promise { const events = await this.runWithHeartbeat(() => runInstrumentedFunction({ diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index 7de2856530e14..c81708e13b275 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -189,6 +189,7 @@ export function getDefaultConfig(): PluginsServerConfig { CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS: '', CDP_ASYNC_FUNCTIONS_CYCLOTRON_TEAMS: '', CDP_REDIS_PASSWORD: '', + CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP: true, CDP_REDIS_HOST: '', CDP_REDIS_PORT: 6479, diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index 50829596613e8..c8f0c2cd82b9a 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -111,6 +111,7 @@ export type CdpConfig = { CDP_REDIS_HOST: string CDP_REDIS_PORT: number CDP_REDIS_PASSWORD: string + CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP: boolean } export interface PluginsServerConfig extends CdpConfig { diff --git a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts index 4bb3a36822008..713a478ac1b14 100644 --- a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts +++ b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts @@ -114,14 +114,13 @@ describe('CDP Processed Events Consumer', () => { }) describe('general event processing', () => { - /** - * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios - */ + beforeEach(() => { + hub.CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP = false + }) describe('common processing', () => { let fnFetchNoFilters: HogFunctionType let fnPrinterPageviewFilters: HogFunctionType - let globals: HogFunctionInvocationGlobals beforeEach(async () => { @@ -296,5 +295,62 @@ describe('CDP Processed Events Consumer', () => { ]) }) }) + + describe('no delayed execution', () => { + beforeEach(() => { + hub.CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP = true + }) + + it('should invoke the initial function before enqueuing', async () => { + await insertHogFunction({ + ...HOG_EXAMPLES.simple_fetch, + ...HOG_INPUTS_EXAMPLES.simple_fetch, + ...HOG_FILTERS_EXAMPLES.no_filters, + }) + // Create a message that should be processed by this function + // Run the function and check that it was executed + await processor._handleKafkaBatch([ + createMessage( + createIncomingEvent(team.id, { + uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', + event: '$pageview', + properties: JSON.stringify({ + $lib_version: '1.0.0', + }), + }) + ), + ]) + + // General check that the message seemed to get processed + expect(decodeAllKafkaMessages()).toMatchObject([ + { + key: expect.any(String), + topic: 'log_entries_test', + value: { + message: 'Executing function', + }, + waitForAck: true, + }, + { + key: expect.any(String), + topic: 'log_entries_test', + value: { + message: expect.stringContaining( + "Suspending function due to async function call 'fetch'. Payload" + ), + }, + waitForAck: true, + }, + { + key: expect.any(String), + topic: 'cdp_function_callbacks_test', + value: { + state: expect.any(String), + }, + waitForAck: true, + }, + ]) + }) + }) }) }) From 3c8efd94ebbceb52bbc4fc03bcd20cf4dbe390e4 Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 2 Sep 2024 12:39:05 +0200 Subject: [PATCH 16/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 764f7e0842e7e..576f703e9618d 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -399,7 +399,6 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { await this.processInvocationResults(invocationResults) } else { await this.queueInvocations(invocationsToBeQueued) - await this.produceQueuedMessages() } await this.produceQueuedMessages() From 8f6762f2fd3a5aef40622f281db72ac4dd5ec66b Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 2 Sep 2024 15:49:31 +0200 Subject: [PATCH 17/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 210 +++++++++++++++++-------- plugin-server/src/cdp/types.ts | 1 + plugin-server/src/cdp/utils.ts | 1 + plugin-server/src/config/config.ts | 2 +- plugin-server/src/types.ts | 2 +- rust/cyclotron-node/src/index.ts | 34 ++-- 6 files changed, 168 insertions(+), 82 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 576f703e9618d..18caefee8fd5c 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -3,6 +3,7 @@ import { captureException } from '@sentry/node' import { Message } from 'node-rdkafka' import { Counter, Histogram } from 'prom-client' +import { buildIntegerMatcher } from '../config/config' import { KAFKA_APP_METRICS_2, KAFKA_CDP_FUNCTION_CALLBACKS, @@ -14,7 +15,7 @@ import { BatchConsumer, startBatchConsumer } from '../kafka/batch-consumer' import { createRdConnectionConfigFromEnvVars } from '../kafka/config' import { addSentryBreadcrumbsEventListeners } from '../main/ingestion-queues/kafka-metrics' import { runInstrumentedFunction } from '../main/utils' -import { AppMetric2Type, Hub, RawClickHouseEvent, TeamId, TimestampFormat } from '../types' +import { AppMetric2Type, Hub, RawClickHouseEvent, TeamId, TimestampFormat, ValueMatcher } from '../types' import { createKafkaProducerWrapper } from '../utils/db/hub' import { KafkaProducerWrapper } from '../utils/db/kafka-producer-wrapper' import { captureTeamEvent } from '../utils/posthog' @@ -85,10 +86,10 @@ abstract class CdpConsumerBase { messagesToProduce: HogFunctionMessageToProduce[] = [] redis: CdpRedis + private cyclotronMatcher: ValueMatcher + protected kafkaProducer?: KafkaProducerWrapper protected abstract name: string - protected abstract topic: string - protected abstract consumerGroupId: string protected heartbeat = () => {} @@ -103,6 +104,11 @@ abstract class CdpConsumerBase { const rustyHook = this.hub?.rustyHook ?? new RustyHook(this.hub) this.fetchExecutor = new FetchExecutor(this.hub, rustyHook) this.groupsManager = new GroupsManager(this.hub) + this.cyclotronMatcher = buildIntegerMatcher(hub.CDP_CYCLOTRON_ENABLED_TEAMS, false) + } + + protected cyclotronEnabled(invocation: HogFunctionInvocation): boolean { + return !!(this.hub.CYCLOTRON_DATABASE_URL && this.cyclotronMatcher(invocation.globals.project.id)) } private async captureInternalPostHogEvent( @@ -215,6 +221,43 @@ abstract class CdpConsumerBase { delete (serializedInvocation as any).hogFunction + if (this.cyclotronEnabled(invocation)) { + // Cyclotron enabled + if (!invocation.vmState) { + // TODO: Figure out how to convert this effectively + // id: string + // globals: HogFunctionInvocationGlobals + // teamId: Team['id'] + // hogFunction: HogFunctionType + // priority: number + // queue: 'hog' | 'fetch' + // queueParameters?: HogFunctionInvocationQueueParameters + // // The current vmstate (set if the invocation is paused) + // vmState?: VMState + // timings: HogFunctionTiming[] + + await cyclotron.createJob({ + id: invocation.id, + teamId: invocation.globals.project.id, + functionId: invocation.hogFunction.id, + queueName: invocation.queue, + parameters: invocation.queueParameters ? JSON.stringify(invocation.queueParameters) : undefined, + priority: invocation.priority, + vmState: JSON.stringify(serializedInvocation), // TODO: This doesn't feel right but we need timings, globals and vmstate to all be somewhere :thinking: + }) + } else { + // Ideally we could just have an "upsertJob" method or something... + await cyclotron.updateJob(invocation.id, { + queue: invocation.queue, + parameters: invocation.queueParameters ? JSON.stringify(invocation.queueParameters) : undefined, + priority: invocation.priority, + vmState: JSON.stringify(serializedInvocation), + }) + } + + return + } + const request: HogFunctionInvocationSerializedCompressed = { state: await gzipObject(serializedInvocation), } @@ -275,11 +318,10 @@ abstract class CdpConsumerBase { }) } - protected async startKafkaConsumer() { + protected async startKafkaConsumer(options: { topic: string; groupId: string }): Promise { this.batchConsumer = await startBatchConsumer({ + ...options, connectionConfig: createRdConnectionConfigFromEnvVars(this.hub), - groupId: this.consumerGroupId, - topic: this.topic, autoCommit: true, sessionTimeout: this.hub.KAFKA_CONSUMPTION_SESSION_TIMEOUT_MS, maxPollIntervalMs: this.hub.KAFKA_CONSUMPTION_MAX_POLL_INTERVAL_MS, @@ -338,8 +380,6 @@ abstract class CdpConsumerBase { this.kafkaProducer = await createKafkaProducerWrapper(this.hub) this.kafkaProducer.producer.connect() - - await this.startKafkaConsumer() } public async stop(): Promise { @@ -370,8 +410,6 @@ abstract class CdpConsumerBase { export class CdpProcessedEventsConsumer extends CdpConsumerBase { protected name = 'CdpProcessedEventsConsumer' - protected topic = KAFKA_EVENTS_JSON - protected consumerGroupId = 'cdp-processed-events-consumer' public async processBatch(invocationGlobals: HogFunctionInvocationGlobals[]): Promise { if (!invocationGlobals.length) { @@ -442,8 +480,10 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { }) const states = await this.hogWatcher.getStates(possibleInvocations.map((x) => x.hogFunction.id)) + const validInvocations: HogFunctionInvocation[] = [] - const notDisabledInvocations = possibleInvocations.filter((item) => { + // Iterate over adding them to the list and updating their priority + possibleInvocations.forEach((item) => { const state = states[item.hogFunction.id].state if (state >= HogWatcherState.disabledForPeriod) { this.produceAppMetric({ @@ -456,15 +496,19 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { : 'disabled_permanently', count: 1, }) - return false + return } - return true + if (state === HogWatcherState.degraded) { + item.priority = 2 + } + + validInvocations.push(item) }) // Now we can filter by masking configs const { masked, notMasked: notMaskedInvocations } = await this.hogMasker.filterByMasking( - notDisabledInvocations + validInvocations ) masked.forEach((item) => { @@ -523,6 +567,14 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { await this.processBatch(invocationGlobals) } + + public async start(): Promise { + await super.start() + await this.startKafkaConsumer({ + topic: KAFKA_EVENTS_JSON, + groupId: 'cdp-processed-events-consumer', + }) + } } /** @@ -530,8 +582,6 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { */ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { protected name = 'CdpFunctionCallbackConsumer' - protected topic = KAFKA_CDP_FUNCTION_CALLBACKS - protected consumerGroupId = 'cdp-function-callback-consumer' public async processBatch(invocations: HogFunctionInvocation[]): Promise { if (!invocations.length) { @@ -634,52 +684,86 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { await this.processBatch(events) } + + public async start(): Promise { + await super.start() + await this.startKafkaConsumer({ + topic: KAFKA_CDP_FUNCTION_CALLBACKS, + groupId: 'cdp-function-callback-consumer', + }) + } } -// // TODO: Split out non-Kafka specific parts of CdpConsumerBase so that it can be used by the -// // Cyclotron worker below. Or maybe we can just wait, and rip the Kafka bits out once Cyclotron is -// // shipped (and rename it something other than consumer, probably). For now, this is an easy way to -// // use existing code and get an end-to-end demo shipped. -// export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { -// protected name = 'CdpCyclotronWorker' -// protected topic = 'UNUSED-CdpCyclotronWorker' -// protected consumerGroupId = 'UNUSED-CdpCyclotronWorker' -// private runningWorker: Promise | undefined -// private isUnhealthy = false - -// private async innerStart() { -// try { -// const limit = 100 // TODO: Make configurable. -// while (!this.isStopping) { -// const jobs = await cyclotron.dequeueJobsWithVmState('hog', limit) -// // TODO: Decode jobs into the right types - -// await this.processBatch(jobs) -// } -// } catch (err) { -// this.isUnhealthy = true -// console.error('Error in Cyclotron worker', err) -// throw err -// } -// } - -// public async start() { -// await cyclotron.initManager({ shards: [{ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }] }) -// await cyclotron.initWorker({ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }) - -// // Consumer `start` expects an async task is started, and not that `start` itself blocks -// // indefinitely. -// this.runningWorker = this.innerStart() - -// return Promise.resolve() -// } - -// public async stop() { -// await super.stop() -// await this.runningWorker -// } - -// public isHealthy() { -// return this.isUnhealthy -// } -// } +export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { + protected name = 'CdpCyclotronWorker' + + private runningWorker: Promise | undefined + private isUnhealthy = false + + private async innerStart() { + try { + const limit = 100 // TODO: Make configurable. + while (!this.isStopping) { + const jobs = await cyclotron.dequeueJobsWithVmState('hog', limit) + const invocations: HogFunctionInvocation[] = [] + + for (const job of jobs) { + // NOTE: This is all a bit messy and might be better to refactor into a helper + if (!job.functionId) { + throw new Error('Bad job: ' + JSON.stringify(job)) + } + const hogFunction = this.hogFunctionManager.getHogFunction(job.functionId) + + if (!hogFunction) { + status.error('Error finding hog function', { + id: job.functionId, + }) + return + } + + const parsedState = JSON.parse(job.metadata!) as HogFunctionInvocationSerialized + + // TODO: Should ID come from the job or the state? + invocations.push({ + id: job.id, + globals: parsedState.globals, + teamId: hogFunction.team_id, + hogFunction, + priority: job.priority, + queue: job.queueName ?? 'hog', + queueParameters: job.parameters ? JSON.parse(job.parameters) : undefined, + vmState: parsedState.vmState, + timings: [], + }) + } + + await this.processBatch(invocations) + } + } catch (err) { + this.isUnhealthy = true + console.error('Error in Cyclotron worker', err) + throw err + } + } + + public async start() { + await super.start() + // await cyclotron.initManager({ shards: [{ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }] }) + await cyclotron.initWorker({ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }) + + // Consumer `start` expects an async task is started, and not that `start` itself blocks + // indefinitely. + this.runningWorker = this.innerStart() + + return Promise.resolve() + } + + public async stop() { + await super.stop() + await this.runningWorker + } + + public isHealthy() { + return this.isUnhealthy + } +} diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index 19740db81306a..e8e673f79982a 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -181,6 +181,7 @@ export type HogFunctionInvocation = { globals: HogFunctionInvocationGlobals teamId: Team['id'] hogFunction: HogFunctionType + priority: number queue: 'hog' | 'fetch' queueParameters?: HogFunctionInvocationQueueParameters // The current vmstate (set if the invocation is paused) diff --git a/plugin-server/src/cdp/utils.ts b/plugin-server/src/cdp/utils.ts index da1d64273f7aa..934afc968e2fb 100644 --- a/plugin-server/src/cdp/utils.ts +++ b/plugin-server/src/cdp/utils.ts @@ -221,6 +221,7 @@ export function createInvocation( teamId: hogFunction.team_id, hogFunction, queue: 'hog', + priority: 1, timings: [], } } diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index c81708e13b275..d68a0f7f21d9e 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -187,7 +187,7 @@ export function getDefaultConfig(): PluginsServerConfig { CDP_WATCHER_REFILL_RATE: 10, CDP_WATCHER_DISABLED_TEMPORARY_MAX_COUNT: 3, CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS: '', - CDP_ASYNC_FUNCTIONS_CYCLOTRON_TEAMS: '', + CDP_CYCLOTRON_ENABLED_TEAMS: '', CDP_REDIS_PASSWORD: '', CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP: true, CDP_REDIS_HOST: '', diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index c8f0c2cd82b9a..6f9204d8d2907 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -107,7 +107,7 @@ export type CdpConfig = { CDP_WATCHER_DISABLED_TEMPORARY_TTL: number // How long a function should be temporarily disabled for CDP_WATCHER_DISABLED_TEMPORARY_MAX_COUNT: number // How many times a function can be disabled before it is disabled permanently CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS: string - CDP_ASYNC_FUNCTIONS_CYCLOTRON_TEAMS: string + CDP_CYCLOTRON_ENABLED_TEAMS: string CDP_REDIS_HOST: string CDP_REDIS_PORT: number CDP_REDIS_PASSWORD: string diff --git a/rust/cyclotron-node/src/index.ts b/rust/cyclotron-node/src/index.ts index fb8dd659d80c3..b4fee61c389dd 100644 --- a/rust/cyclotron-node/src/index.ts +++ b/rust/cyclotron-node/src/index.ts @@ -75,7 +75,7 @@ export interface Job { blob: Uint8Array | null } -export async function initWorker(poolConfig: PoolConfig): Promise { +async function initWorker(poolConfig: PoolConfig): Promise { const initWorkerInternal: InternalPoolConfig = { db_url: poolConfig.dbUrl, max_connections: poolConfig.maxConnections, @@ -87,7 +87,7 @@ export async function initWorker(poolConfig: PoolConfig): Promise { return await cyclotron.initWorker(JSON.stringify(initWorkerInternal)) } -export async function initManager(managerConfig: ManagerConfig): Promise { +async function initManager(managerConfig: ManagerConfig): Promise { const managerConfigInternal: InternalManagerConfig = { shards: managerConfig.shards.map((shard) => ({ db_url: shard.dbUrl, @@ -101,7 +101,7 @@ export async function initManager(managerConfig: ManagerConfig): Promise { return await cyclotron.initManager(JSON.stringify(managerConfigInternal)) } -export async function maybeInitWorker(poolConfig: PoolConfig): Promise { +async function maybeInitWorker(poolConfig: PoolConfig): Promise { const initWorkerInternal: InternalPoolConfig = { db_url: poolConfig.dbUrl, max_connections: poolConfig.maxConnections, @@ -113,7 +113,7 @@ export async function maybeInitWorker(poolConfig: PoolConfig): Promise { return await cyclotron.maybeInitWorker(JSON.stringify(initWorkerInternal)) } -export async function maybeInitManager(managerConfig: ManagerConfig): Promise { +async function maybeInitManager(managerConfig: ManagerConfig): Promise { const managerConfigInternal: InternalManagerConfig = { shards: managerConfig.shards.map((shard) => ({ db_url: shard.dbUrl, @@ -127,7 +127,7 @@ export async function maybeInitManager(managerConfig: ManagerConfig): Promise { +async function createJob(job: JobInit): Promise { job.priority ??= 1 job.scheduled ??= new Date() @@ -146,35 +146,35 @@ export async function createJob(job: JobInit): Promise { return await cyclotron.createJob(json, job.blob ? job.blob.buffer : undefined) } -export async function dequeueJobs(queueName: string, limit: number): Promise { +async function dequeueJobs(queueName: string, limit: number): Promise { return await cyclotron.dequeueJobs(queueName, limit) } -export async function dequeueJobsWithVmState(queueName: string, limit: number): Promise { +async function dequeueJobsWithVmState(queueName: string, limit: number): Promise { return await cyclotron.dequeueJobsWithVmState(queueName, limit) } -export async function flushJob(jobId: string): Promise { +async function flushJob(jobId: string): Promise { return await cyclotron.flushJob(jobId) } -export function setState(jobId: string, jobState: JobState): Promise { +function setState(jobId: string, jobState: JobState): Promise { return cyclotron.setState(jobId, jobState) } -export function setQueue(jobId: string, queueName: string): Promise { +function setQueue(jobId: string, queueName: string): Promise { return cyclotron.setQueue(jobId, queueName) } -export function setPriority(jobId: string, priority: number): Promise { +function setPriority(jobId: string, priority: number): Promise { return cyclotron.setPriority(jobId, priority) } -export function setScheduledAt(jobId: string, scheduledAt: Date): Promise { +function setScheduledAt(jobId: string, scheduledAt: Date): Promise { return cyclotron.setScheduledAt(jobId, scheduledAt.toISOString()) } -export function serializeObject(name: string, obj: Record | null): string | null { +function serializeObject(name: string, obj: Record | null): string | null { if (obj === null) { return null } else if (typeof obj === 'object' && obj !== null) { @@ -183,22 +183,22 @@ export function serializeObject(name: string, obj: Record | null): throw new Error(`${name} must be either an object or null`) } -export function setVmState(jobId: string, vmState: Record | null): Promise { +function setVmState(jobId: string, vmState: Record | null): Promise { const serialized = serializeObject('vmState', vmState) return cyclotron.setVmState(jobId, serialized) } -export function setMetadata(jobId: string, metadata: Record | null): Promise { +function setMetadata(jobId: string, metadata: Record | null): Promise { const serialized = serializeObject('metadata', metadata) return cyclotron.setMetadata(jobId, serialized) } -export function setParameters(jobId: string, parameters: Record | null): Promise { +function setParameters(jobId: string, parameters: Record | null): Promise { const serialized = serializeObject('parameters', parameters) return cyclotron.setParameters(jobId, serialized) } -export function setBlob(jobId: string, blob: Uint8Array | null): Promise { +function setBlob(jobId: string, blob: Uint8Array | null): Promise { return cyclotron.setBlob(jobId, blob) } From 68b8e493430b4b26e962de9b9ed88404cb2270a9 Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 2 Sep 2024 15:53:03 +0200 Subject: [PATCH 18/75] Fix --- plugin-server/src/cdp/cdp-consumers.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 18caefee8fd5c..a396c59f249dc 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -705,6 +705,7 @@ export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { const limit = 100 // TODO: Make configurable. while (!this.isStopping) { const jobs = await cyclotron.dequeueJobsWithVmState('hog', limit) + // TODO: How do we "hold" these dequeued jobs? const invocations: HogFunctionInvocation[] = [] for (const job of jobs) { From b2ecbf46985b0955b4f310494c1a7ba5b2708712 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 09:21:32 +0200 Subject: [PATCH 19/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 9 +++++++-- rust/cyclotron-node/src/index.ts | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index a396c59f249dc..7de1607db3ae5 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -241,7 +241,10 @@ abstract class CdpConsumerBase { teamId: invocation.globals.project.id, functionId: invocation.hogFunction.id, queueName: invocation.queue, - parameters: invocation.queueParameters ? JSON.stringify(invocation.queueParameters) : undefined, + queueParameters: invocation.queueParameters + ? JSON.stringify(invocation.queueParameters) + : undefined, + queueBlob: invocation.queueBlob, priority: invocation.priority, vmState: JSON.stringify(serializedInvocation), // TODO: This doesn't feel right but we need timings, globals and vmstate to all be somewhere :thinking: }) @@ -704,6 +707,9 @@ export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { try { const limit = 100 // TODO: Make configurable. while (!this.isStopping) { + // TODO: Add a timeout check + await cyclotron.dequeueJobsWithVmState('hog', limit, (jobs) => {}) + const jobs = await cyclotron.dequeueJobsWithVmState('hog', limit) // TODO: How do we "hold" these dequeued jobs? const invocations: HogFunctionInvocation[] = [] @@ -749,7 +755,6 @@ export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { public async start() { await super.start() - // await cyclotron.initManager({ shards: [{ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }] }) await cyclotron.initWorker({ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }) // Consumer `start` expects an async task is started, and not that `start` itself blocks diff --git a/rust/cyclotron-node/src/index.ts b/rust/cyclotron-node/src/index.ts index b4fee61c389dd..155fcffae09fa 100644 --- a/rust/cyclotron-node/src/index.ts +++ b/rust/cyclotron-node/src/index.ts @@ -158,6 +158,7 @@ async function flushJob(jobId: string): Promise { return await cyclotron.flushJob(jobId) } +// TODO: Remove promise type returns function setState(jobId: string, jobState: JobState): Promise { return cyclotron.setState(jobId, jobState) } From b95a3b5ccff9cb8d8b0781609773a498908f5094 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 10:23:47 +0200 Subject: [PATCH 20/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 31 +-- plugin-server/tests/cdp/cdp-e2e.test.ts | 197 ++++++++++++++++++ ...test.ts => cdp-function-processor.test.ts} | 7 +- rust/cyclotron-node/src/index.ts | 135 ++++++------ 4 files changed, 277 insertions(+), 93 deletions(-) create mode 100644 plugin-server/tests/cdp/cdp-e2e.test.ts rename plugin-server/tests/cdp/{cdp-consumer.e2e.test.ts => cdp-function-processor.test.ts} (97%) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 7de1607db3ae5..4fba5072dee8c 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -210,7 +210,7 @@ abstract class CdpConsumerBase { } protected async queueInvocation(invocation: HogFunctionInvocation) { - // TODO: Add cylcotron check here and enqueue that way + // TODO: Add cyclotron check here and enqueue that way // For now we just enqueue to kafka // For kafka style this is overkill to enqueue this way but it simplifies migrating to the new system @@ -236,26 +236,30 @@ abstract class CdpConsumerBase { // vmState?: VMState // timings: HogFunctionTiming[] - await cyclotron.createJob({ - id: invocation.id, + const job = await cyclotron.createJob({ teamId: invocation.globals.project.id, functionId: invocation.hogFunction.id, queueName: invocation.queue, - queueParameters: invocation.queueParameters - ? JSON.stringify(invocation.queueParameters) - : undefined, - queueBlob: invocation.queueBlob, + parameters: invocation.queueParameters ? JSON.stringify(invocation.queueParameters) : undefined, + // queueBlob: invocation.blob, priority: invocation.priority, vmState: JSON.stringify(serializedInvocation), // TODO: This doesn't feel right but we need timings, globals and vmstate to all be somewhere :thinking: }) + + console.log('Created job', job) } else { // Ideally we could just have an "upsertJob" method or something... - await cyclotron.updateJob(invocation.id, { - queue: invocation.queue, - parameters: invocation.queueParameters ? JSON.stringify(invocation.queueParameters) : undefined, - priority: invocation.priority, - vmState: JSON.stringify(serializedInvocation), - }) + cyclotron.setQueue(invocation.id, invocation.queue) + cyclotron.setVmState(invocation.id, serializedInvocation) + cyclotron.setPriority(invocation.id, invocation.priority) + cyclotron.setParameters(invocation.id, invocation.queueParameters ?? null) + + // await cyclotron.updateJob(invocation.id, { + // queue: invocation.queue, + // parameters: invocation.queueParameters ? JSON.stringify(invocation.queueParameters) : undefined, + // priority: invocation.priority, + // vmState: JSON.stringify(serializedInvocation), + // }) } return @@ -653,6 +657,7 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { invocationSerialized.hogFunctionId ) if (!hogFunction) { + console.log('HERE!!!!', invocationSerialized) status.error('Error finding hog function', { id: invocationSerialized.hogFunctionId, }) diff --git a/plugin-server/tests/cdp/cdp-e2e.test.ts b/plugin-server/tests/cdp/cdp-e2e.test.ts new file mode 100644 index 0000000000000..da912f62f949f --- /dev/null +++ b/plugin-server/tests/cdp/cdp-e2e.test.ts @@ -0,0 +1,197 @@ +import { KafkaConsumer, LibrdKafkaError, Message } from 'node-rdkafka' + +import { CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../../src/cdp/cdp-consumers' +import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' +import { KAFKA_APP_METRICS_2 } from '../../src/config/kafka-topics' +import { BatchConsumer, startBatchConsumer } from '../../src/kafka/batch-consumer' +import { createRdConnectionConfigFromEnvVars } from '../../src/kafka/config' +import { createKafkaConsumer } from '../../src/kafka/consumer' +import { Hub, Team } from '../../src/types' +import { createHub } from '../../src/utils/db/hub' +import { delay } from '../../src/utils/utils' +import { getFirstTeam, resetTestDatabase } from '../helpers/sql' +import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' +import { createHogExecutionGlobals, insertHogFunction as _insertHogFunction } from './fixtures' + +jest.mock('../../src/utils/fetch', () => { + return { + trackedFetch: jest.fn(() => + Promise.resolve({ + status: 200, + text: () => Promise.resolve(JSON.stringify({ success: true })), + json: () => Promise.resolve({ success: true }), + }) + ), + } +}) + +const mockFetch: jest.Mock = require('../../src/utils/fetch').trackedFetch + +jest.setTimeout(1000) + +type KafkaObserver = { + messages: Message[] + consumer: KafkaConsumer + stop: () => Promise +} +const createKafkaObserver = async (hub: Hub): Promise => { + const consumer = await createKafkaConsumer({ + ...createRdConnectionConfigFromEnvVars(hub), + 'group.id': 'test-group', + }) + + consumer.connect() + consumer.subscribe([KAFKA_APP_METRICS_2]) + const messages: Message[] = [] + + const poll = async () => { + await delay(50) + if (!consumer.isConnected()) { + return + } + const newMessages = await new Promise((res, rej) => + consumer.consume(1000, (err, messages) => (err ? rej(err) : res(messages))) + ) + messages.push(...newMessages) + poll() + } + + poll() + + return { + messages, + consumer, + stop: () => new Promise((res) => consumer.disconnect(res)), + } +} + +describe('CDP E2E', () => { + let processedEventsConsumer: CdpProcessedEventsConsumer + let functionProcessor: CdpFunctionCallbackConsumer + let hub: Hub + let closeHub: () => Promise + let team: Team + let kafkaObserver: KafkaObserver + + const insertHogFunction = async (hogFunction: Partial) => { + const item = await _insertHogFunction(hub.postgres, team.id, hogFunction) + // Trigger the reload that django would do + await processedEventsConsumer.hogFunctionManager.reloadAllHogFunctions() + await functionProcessor.hogFunctionManager.reloadAllHogFunctions() + return item + } + + beforeEach(async () => { + await resetTestDatabase() + ;[hub, closeHub] = await createHub() + team = await getFirstTeam(hub) + + kafkaObserver = await createKafkaObserver(hub) + + processedEventsConsumer = new CdpProcessedEventsConsumer(hub) + await processedEventsConsumer.start() + functionProcessor = new CdpFunctionCallbackConsumer(hub) + await functionProcessor.start() + + mockFetch.mockClear() + }) + + afterEach(async () => { + jest.setTimeout(10000) + await processedEventsConsumer.stop() + await functionProcessor.stop() + await kafkaObserver.stop() + await closeHub() + }) + + afterAll(() => { + jest.useRealTimers() + }) + + describe('full fetch function', () => { + /** + * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios + */ + + let fnFetchNoFilters: HogFunctionType + let globals: HogFunctionInvocationGlobals + + beforeEach(async () => { + fnFetchNoFilters = await insertHogFunction({ + ...HOG_EXAMPLES.simple_fetch, + ...HOG_INPUTS_EXAMPLES.simple_fetch, + ...HOG_FILTERS_EXAMPLES.no_filters, + }) + + globals = createHogExecutionGlobals({ + project: { + id: team.id, + } as any, + event: { + uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', + name: '$pageview', + properties: { + $current_url: 'https://posthog.com', + $lib_version: '1.0.0', + }, + } as any, + }) + }) + + // const gatherProducedMessages = () => { + // const allMessages = decodeAllKafkaMessages() + + // allMessages.forEach((message) => { + // if (message.topic === 'clickhouse_app_metrics2_test') { + // kafkaMessages.metrics.push(message) + // } else if (message.topic === 'log_entries_test') { + // kafkaMessages.logs.push(message) + // } else if (message.topic === 'cdp_function_callbacks_test') { + // kafkaMessages.invocations.push(message) + // } else { + // throw new Error(`Unknown topic: ${message.topic}`) + // } + // }) + + // mockProducer.produce.mockClear() + // } + + it('should invoke a function via kafka transportation until completed', async () => { + // NOTE: We can skip kafka as the entry point + const invocations = await processedEventsConsumer.processBatch([globals]) + expect(invocations).toHaveLength(1) + // gatherProducedMessages() + + // expect(kafkaMessages.invocations).toHaveLength(1) + // expect(kafkaMessages.invocations[0].topic).toEqual('cdp_function_callbacks_test') + // // mockProducer.produce.mockClear() + + // while (kafkaMessages.invocations.length) { + // await functionProcessor._handleKafkaBatch([convertToKafkaMessage(kafkaMessages.invocations[0])]) + // kafkaMessages.invocations = [] + // gatherProducedMessages() + // } + + // expect(kafkaMessages.metrics).toMatchObject([ + // { + // key: fnFetchNoFilters.id.toString(), + // value: { + // app_source: 'hog_function', + // app_source_id: fnFetchNoFilters.id.toString(), + // count: 1, + // metric_kind: 'success', + // metric_name: 'succeeded', + // team_id: 2, + // }, + // }, + // ]) + // expect(kafkaMessages.logs.map((x) => x.value.message)).toEqual([ + // 'Executing function', + // "Suspending function due to async function call 'fetch'. Payload: 1902 bytes", + // 'Resuming function', + // 'Fetch response:, {"status":200,"body":{"success":true}}', + // expect.stringContaining('Function completed'), + // ]) + }) + }) +}) diff --git a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts b/plugin-server/tests/cdp/cdp-function-processor.test.ts similarity index 97% rename from plugin-server/tests/cdp/cdp-consumer.e2e.test.ts rename to plugin-server/tests/cdp/cdp-function-processor.test.ts index 98cb2e041b042..5e343db0f8fc4 100644 --- a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-function-processor.test.ts @@ -80,10 +80,7 @@ const convertToKafkaMessage = (message: any): any => { } } -/** - * NOTE: This isn't fully e2e... We still mock kafka but we trigger one queue from the other in a loop - */ -describe('CDP Consumers E2E', () => { +describe('CDP Function Processor', () => { let processedEventsConsumer: CdpProcessedEventsConsumer let functionProcessor: CdpFunctionCallbackConsumer let hub: Hub @@ -122,7 +119,7 @@ describe('CDP Consumers E2E', () => { jest.useRealTimers() }) - describe('e2e fetch function', () => { + describe('full fetch function', () => { /** * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios */ diff --git a/rust/cyclotron-node/src/index.ts b/rust/cyclotron-node/src/index.ts index 155fcffae09fa..d14a14868cbab 100644 --- a/rust/cyclotron-node/src/index.ts +++ b/rust/cyclotron-node/src/index.ts @@ -1,7 +1,7 @@ // eslint-disable-next-line @typescript-eslint/no-var-requires const cyclotron = require('../index.node') -export interface PoolConfig { +export type PoolConfig = { dbUrl: string maxConnections?: number minConnections?: number @@ -11,7 +11,7 @@ export interface PoolConfig { } // Type as expected by Cyclotron. -interface InternalPoolConfig { +type InternalPoolConfig = { db_url: string max_connections?: number min_connections?: number @@ -20,29 +20,17 @@ interface InternalPoolConfig { idle_timeout_seconds?: number } -export interface ManagerConfig { +export type ManagerConfig = { shards: PoolConfig[] } // Type as expected by Cyclotron. -interface InternalManagerConfig { +type InternalManagerConfig = { shards: InternalPoolConfig[] } -export interface JobInit { - teamId: number - functionId: string - queueName: string - priority?: number - scheduled?: Date - vmState?: string - parameters?: string - blob?: Uint8Array - metadata?: string -} - // Type as expected by Cyclotron. -interface InternalJobInit { +type InternalJobInit = { team_id: number function_id: string queue_name: string @@ -55,7 +43,7 @@ interface InternalJobInit { export type JobState = 'available' | 'running' | 'completed' | 'failed' | 'paused' -export interface Job { +export type Job = { id: string teamId: number functionId: string | null @@ -75,8 +63,31 @@ export interface Job { blob: Uint8Array | null } -async function initWorker(poolConfig: PoolConfig): Promise { - const initWorkerInternal: InternalPoolConfig = { +export type JobInit = { + teamId: number + functionId: string + queueName: string + priority?: number + scheduled?: Date + vmState?: string + parameters?: string + blob?: Uint8Array + metadata?: string +} + +// helpers + +function serializeObject(name: string, obj: Record | null): string | null { + if (obj === null) { + return null + } else if (typeof obj === 'object' && obj !== null) { + return JSON.stringify(obj) + } + throw new Error(`${name} must be either an object or null`) +} + +function convertToInternalPoolConfig(poolConfig: PoolConfig): InternalPoolConfig { + return { db_url: poolConfig.dbUrl, max_connections: poolConfig.maxConnections, min_connections: poolConfig.minConnections, @@ -84,49 +95,44 @@ async function initWorker(poolConfig: PoolConfig): Promise { max_lifetime_seconds: poolConfig.maxLifetimeSeconds, idle_timeout_seconds: poolConfig.idleTimeoutSeconds, } - return await cyclotron.initWorker(JSON.stringify(initWorkerInternal)) +} + +// Management API +async function initWorker(poolConfig: PoolConfig): Promise { + return await cyclotron.initWorker(JSON.stringify(convertToInternalPoolConfig(poolConfig))) } async function initManager(managerConfig: ManagerConfig): Promise { const managerConfigInternal: InternalManagerConfig = { - shards: managerConfig.shards.map((shard) => ({ - db_url: shard.dbUrl, - max_connections: shard.maxConnections, - min_connections: shard.minConnections, - acquire_timeout_seconds: shard.acquireTimeoutSeconds, - max_lifetime_seconds: shard.maxLifetimeSeconds, - idle_timeout_seconds: shard.idleTimeoutSeconds, - })), + shards: managerConfig.shards.map((shard) => convertToInternalPoolConfig(shard)), } return await cyclotron.initManager(JSON.stringify(managerConfigInternal)) } async function maybeInitWorker(poolConfig: PoolConfig): Promise { - const initWorkerInternal: InternalPoolConfig = { - db_url: poolConfig.dbUrl, - max_connections: poolConfig.maxConnections, - min_connections: poolConfig.minConnections, - acquire_timeout_seconds: poolConfig.acquireTimeoutSeconds, - max_lifetime_seconds: poolConfig.maxLifetimeSeconds, - idle_timeout_seconds: poolConfig.idleTimeoutSeconds, - } - return await cyclotron.maybeInitWorker(JSON.stringify(initWorkerInternal)) + return await cyclotron.maybeInitWorker(JSON.stringify(convertToInternalPoolConfig(poolConfig))) } async function maybeInitManager(managerConfig: ManagerConfig): Promise { const managerConfigInternal: InternalManagerConfig = { - shards: managerConfig.shards.map((shard) => ({ - db_url: shard.dbUrl, - max_connections: shard.maxConnections, - min_connections: shard.minConnections, - acquire_timeout_seconds: shard.acquireTimeoutSeconds, - max_lifetime_seconds: shard.maxLifetimeSeconds, - idle_timeout_seconds: shard.idleTimeoutSeconds, - })), + shards: managerConfig.shards.map((shard) => convertToInternalPoolConfig(shard)), } return await cyclotron.maybeInitManager(JSON.stringify(managerConfigInternal)) } +async function dequeueJobs(queueName: string, limit: number): Promise { + return await cyclotron.dequeueJobs(queueName, limit) +} + +async function dequeueJobsWithVmState(queueName: string, limit: number): Promise { + return await cyclotron.dequeueJobsWithVmState(queueName, limit) +} + +async function flushJob(jobId: string): Promise { + return await cyclotron.flushJob(jobId) +} + +// Job API async function createJob(job: JobInit): Promise { job.priority ??= 1 job.scheduled ??= new Date() @@ -146,60 +152,39 @@ async function createJob(job: JobInit): Promise { return await cyclotron.createJob(json, job.blob ? job.blob.buffer : undefined) } -async function dequeueJobs(queueName: string, limit: number): Promise { - return await cyclotron.dequeueJobs(queueName, limit) -} - -async function dequeueJobsWithVmState(queueName: string, limit: number): Promise { - return await cyclotron.dequeueJobsWithVmState(queueName, limit) -} - -async function flushJob(jobId: string): Promise { - return await cyclotron.flushJob(jobId) -} - // TODO: Remove promise type returns -function setState(jobId: string, jobState: JobState): Promise { +function setState(jobId: string, jobState: JobState): void { return cyclotron.setState(jobId, jobState) } -function setQueue(jobId: string, queueName: string): Promise { +function setQueue(jobId: string, queueName: string): void { return cyclotron.setQueue(jobId, queueName) } -function setPriority(jobId: string, priority: number): Promise { +function setPriority(jobId: string, priority: number): void { return cyclotron.setPriority(jobId, priority) } -function setScheduledAt(jobId: string, scheduledAt: Date): Promise { +function setScheduledAt(jobId: string, scheduledAt: Date): void { return cyclotron.setScheduledAt(jobId, scheduledAt.toISOString()) } -function serializeObject(name: string, obj: Record | null): string | null { - if (obj === null) { - return null - } else if (typeof obj === 'object' && obj !== null) { - return JSON.stringify(obj) - } - throw new Error(`${name} must be either an object or null`) -} - -function setVmState(jobId: string, vmState: Record | null): Promise { +function setVmState(jobId: string, vmState: Record | null): void { const serialized = serializeObject('vmState', vmState) return cyclotron.setVmState(jobId, serialized) } -function setMetadata(jobId: string, metadata: Record | null): Promise { +function setMetadata(jobId: string, metadata: Record | null): void { const serialized = serializeObject('metadata', metadata) return cyclotron.setMetadata(jobId, serialized) } -function setParameters(jobId: string, parameters: Record | null): Promise { +function setParameters(jobId: string, parameters: Record | null): void { const serialized = serializeObject('parameters', parameters) return cyclotron.setParameters(jobId, serialized) } -function setBlob(jobId: string, blob: Uint8Array | null): Promise { +function setBlob(jobId: string, blob: Uint8Array | null): void { return cyclotron.setBlob(jobId, blob) } From 71ec5330ba31a6493099dd734be368142d376cf9 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 11:01:24 +0200 Subject: [PATCH 21/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 3 + plugin-server/src/utils/status.ts | 7 +- plugin-server/tests/cdp/cdp-e2e.test.ts | 207 +++++++++--------- .../tests/cdp/helpers/kafka-observer.ts | 67 ++++++ plugin-server/tests/helpers/expectations.ts | 17 ++ 5 files changed, 197 insertions(+), 104 deletions(-) create mode 100644 plugin-server/tests/cdp/helpers/kafka-observer.ts create mode 100644 plugin-server/tests/helpers/expectations.ts diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 4fba5072dee8c..215dda1683db4 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -369,6 +369,9 @@ abstract class CdpConsumerBase { addSentryBreadcrumbsEventListeners(this.batchConsumer.consumer) this.batchConsumer.consumer.on('disconnected', async (err) => { + if (!this.isStopping) { + return + } // since we can't be guaranteed that the consumer will be stopped before some other code calls disconnect // we need to listen to disconnect and make sure we're stopped status.info('🔁', `${this.name} batch consumer disconnected, cleaning up`, { err }) diff --git a/plugin-server/src/utils/status.ts b/plugin-server/src/utils/status.ts index 385b97739685e..d4bb164bb25e5 100644 --- a/plugin-server/src/utils/status.ts +++ b/plugin-server/src/utils/status.ts @@ -15,7 +15,7 @@ export interface StatusBlueprint { export class Status implements StatusBlueprint { mode?: string - logger: pino.Logger + private logger?: pino.Logger prompt: string transport: any @@ -59,11 +59,16 @@ export class Status implements StatusBlueprint { close() { this.transport?.end() + this.logger = undefined } buildMethod(type: keyof StatusBlueprint): StatusMethod { return (icon: string, message: string, extra: object) => { const logMessage = `[${this.prompt}] ${icon} ${message}` + + if (!this.logger) { + throw new Error(`Logger has been closed! Cannot log: ${logMessage}`) + } if (extra instanceof Object) { this.logger[type]({ ...extra, msg: logMessage }) } else { diff --git a/plugin-server/tests/cdp/cdp-e2e.test.ts b/plugin-server/tests/cdp/cdp-e2e.test.ts index da912f62f949f..da90bd9a58e35 100644 --- a/plugin-server/tests/cdp/cdp-e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-e2e.test.ts @@ -1,17 +1,13 @@ -import { KafkaConsumer, LibrdKafkaError, Message } from 'node-rdkafka' - import { CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../../src/cdp/cdp-consumers' import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' -import { KAFKA_APP_METRICS_2 } from '../../src/config/kafka-topics' -import { BatchConsumer, startBatchConsumer } from '../../src/kafka/batch-consumer' -import { createRdConnectionConfigFromEnvVars } from '../../src/kafka/config' -import { createKafkaConsumer } from '../../src/kafka/consumer' +import { KAFKA_APP_METRICS_2, KAFKA_LOG_ENTRIES } from '../../src/config/kafka-topics' import { Hub, Team } from '../../src/types' import { createHub } from '../../src/utils/db/hub' -import { delay } from '../../src/utils/utils' +import { waitForExpect } from '../helpers/expectations' import { getFirstTeam, resetTestDatabase } from '../helpers/sql' import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' import { createHogExecutionGlobals, insertHogFunction as _insertHogFunction } from './fixtures' +import { createKafkaObserver, TestKafkaObserver } from './helpers/kafka-observer' jest.mock('../../src/utils/fetch', () => { return { @@ -27,51 +23,13 @@ jest.mock('../../src/utils/fetch', () => { const mockFetch: jest.Mock = require('../../src/utils/fetch').trackedFetch -jest.setTimeout(1000) - -type KafkaObserver = { - messages: Message[] - consumer: KafkaConsumer - stop: () => Promise -} -const createKafkaObserver = async (hub: Hub): Promise => { - const consumer = await createKafkaConsumer({ - ...createRdConnectionConfigFromEnvVars(hub), - 'group.id': 'test-group', - }) - - consumer.connect() - consumer.subscribe([KAFKA_APP_METRICS_2]) - const messages: Message[] = [] - - const poll = async () => { - await delay(50) - if (!consumer.isConnected()) { - return - } - const newMessages = await new Promise((res, rej) => - consumer.consume(1000, (err, messages) => (err ? rej(err) : res(messages))) - ) - messages.push(...newMessages) - poll() - } - - poll() - - return { - messages, - consumer, - stop: () => new Promise((res) => consumer.disconnect(res)), - } -} - describe('CDP E2E', () => { let processedEventsConsumer: CdpProcessedEventsConsumer let functionProcessor: CdpFunctionCallbackConsumer let hub: Hub let closeHub: () => Promise let team: Team - let kafkaObserver: KafkaObserver + let kafkaObserver: TestKafkaObserver const insertHogFunction = async (hogFunction: Partial) => { const item = await _insertHogFunction(hub.postgres, team.id, hogFunction) @@ -86,7 +44,7 @@ describe('CDP E2E', () => { ;[hub, closeHub] = await createHub() team = await getFirstTeam(hub) - kafkaObserver = await createKafkaObserver(hub) + kafkaObserver = await createKafkaObserver(hub, [KAFKA_APP_METRICS_2, KAFKA_LOG_ENTRIES]) processedEventsConsumer = new CdpProcessedEventsConsumer(hub) await processedEventsConsumer.start() @@ -97,18 +55,19 @@ describe('CDP E2E', () => { }) afterEach(async () => { - jest.setTimeout(10000) - await processedEventsConsumer.stop() - await functionProcessor.stop() - await kafkaObserver.stop() - await closeHub() + try { + await Promise.all([processedEventsConsumer.stop(), functionProcessor.stop(), kafkaObserver.stop()]) + await closeHub() + } catch (e) { + console.error('Error in afterEach:', e) + } }) afterAll(() => { jest.useRealTimers() }) - describe('full fetch function', () => { + describe.each(['kafka', 'cyclotron'])('e2e fetch call: %s', (mode) => { /** * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios */ @@ -134,64 +93,106 @@ describe('CDP E2E', () => { $current_url: 'https://posthog.com', $lib_version: '1.0.0', }, + timestamp: '2024-09-03T09:00:00Z', } as any, }) - }) - - // const gatherProducedMessages = () => { - // const allMessages = decodeAllKafkaMessages() - // allMessages.forEach((message) => { - // if (message.topic === 'clickhouse_app_metrics2_test') { - // kafkaMessages.metrics.push(message) - // } else if (message.topic === 'log_entries_test') { - // kafkaMessages.logs.push(message) - // } else if (message.topic === 'cdp_function_callbacks_test') { - // kafkaMessages.invocations.push(message) - // } else { - // throw new Error(`Unknown topic: ${message.topic}`) - // } - // }) - - // mockProducer.produce.mockClear() - // } + if (mode === 'cyclotron') { + hub.CDP_CYCLOTRON_ENABLED_TEAMS = '*' + hub.CYCLOTRON_DATABASE_URL = 'postgres://localhost:5432/test_cyclotron' + } + }) it('should invoke a function via kafka transportation until completed', async () => { // NOTE: We can skip kafka as the entry point const invocations = await processedEventsConsumer.processBatch([globals]) expect(invocations).toHaveLength(1) - // gatherProducedMessages() - - // expect(kafkaMessages.invocations).toHaveLength(1) - // expect(kafkaMessages.invocations[0].topic).toEqual('cdp_function_callbacks_test') - // // mockProducer.produce.mockClear() - - // while (kafkaMessages.invocations.length) { - // await functionProcessor._handleKafkaBatch([convertToKafkaMessage(kafkaMessages.invocations[0])]) - // kafkaMessages.invocations = [] - // gatherProducedMessages() - // } - - // expect(kafkaMessages.metrics).toMatchObject([ - // { - // key: fnFetchNoFilters.id.toString(), - // value: { - // app_source: 'hog_function', - // app_source_id: fnFetchNoFilters.id.toString(), - // count: 1, - // metric_kind: 'success', - // metric_name: 'succeeded', - // team_id: 2, - // }, - // }, - // ]) - // expect(kafkaMessages.logs.map((x) => x.value.message)).toEqual([ - // 'Executing function', - // "Suspending function due to async function call 'fetch'. Payload: 1902 bytes", - // 'Resuming function', - // 'Fetch response:, {"status":200,"body":{"success":true}}', - // expect.stringContaining('Function completed'), - // ]) + + await waitForExpect(() => { + expect(kafkaObserver.messages).toHaveLength(6) + }) + + expect(mockFetch).toHaveBeenCalledTimes(1) + + expect(mockFetch.mock.calls[0]).toMatchInlineSnapshot(` + Array [ + "https://example.com/posthog-webhook", + Object { + "body": "{\\"event\\":{\\"uuid\\":\\"b3a1fe86-b10c-43cc-acaf-d208977608d0\\",\\"name\\":\\"$pageview\\",\\"distinct_id\\":\\"distinct_id\\",\\"url\\":\\"http://localhost:8000/events/1\\",\\"properties\\":{\\"$current_url\\":\\"https://posthog.com\\",\\"$lib_version\\":\\"1.0.0\\"},\\"timestamp\\":\\"2024-09-03T09:00:00Z\\"},\\"groups\\":{},\\"nested\\":{\\"foo\\":\\"http://localhost:8000/events/1\\"},\\"person\\":{\\"uuid\\":\\"uuid\\",\\"name\\":\\"test\\",\\"url\\":\\"http://localhost:8000/persons/1\\",\\"properties\\":{\\"email\\":\\"test@posthog.com\\"}},\\"event_url\\":\\"http://localhost:8000/events/1-test\\"}", + "headers": Object { + "version": "v=1.0.0", + }, + "method": "POST", + "timeout": 10000, + }, + ] + `) + + expect(kafkaObserver.messages).toMatchObject([ + { + topic: 'log_entries_test', + value: { + level: 'debug', + log_source: 'hog_function', + log_source_id: fnFetchNoFilters.id.toString(), + message: 'Executing function', + team_id: 2, + }, + }, + { + topic: 'log_entries_test', + value: { + level: 'debug', + log_source: 'hog_function', + log_source_id: fnFetchNoFilters.id.toString(), + message: expect.stringContaining( + "Suspending function due to async function call 'fetch'. Payload:" + ), + team_id: 2, + }, + }, + { + topic: 'clickhouse_app_metrics2_test', + value: { + app_source: 'hog_function', + app_source_id: fnFetchNoFilters.id.toString(), + count: 1, + metric_kind: 'success', + metric_name: 'succeeded', + team_id: 2, + }, + }, + { + topic: 'log_entries_test', + value: { + level: 'debug', + log_source: 'hog_function', + log_source_id: fnFetchNoFilters.id.toString(), + message: 'Resuming function', + team_id: 2, + }, + }, + { + topic: 'log_entries_test', + value: { + level: 'info', + log_source: 'hog_function', + log_source_id: fnFetchNoFilters.id.toString(), + message: `Fetch response:, {"status":200,"body":{"success":true}}`, + team_id: 2, + }, + }, + { + topic: 'log_entries_test', + value: { + level: 'debug', + log_source: 'hog_function', + log_source_id: fnFetchNoFilters.id.toString(), + message: expect.stringContaining('Function completed in'), + team_id: 2, + }, + }, + ]) }) }) }) diff --git a/plugin-server/tests/cdp/helpers/kafka-observer.ts b/plugin-server/tests/cdp/helpers/kafka-observer.ts new file mode 100644 index 0000000000000..ec5233e72a798 --- /dev/null +++ b/plugin-server/tests/cdp/helpers/kafka-observer.ts @@ -0,0 +1,67 @@ +import { KafkaConsumer, Message } from 'node-rdkafka' + +import { createRdConnectionConfigFromEnvVars } from '../../../src/kafka/config' +import { createKafkaConsumer } from '../../../src/kafka/consumer' +import { Hub } from '../../../src/types' +import { delay } from '../../../src/utils/utils' + +export type TestKafkaObserver = { + messages: { + topic: string + value: any + }[] + consumer: KafkaConsumer + stop: () => Promise + expectMessageCount: (count: number) => Promise +} + +export const createKafkaObserver = async (hub: Hub, topics: string[]): Promise => { + const consumer = await createKafkaConsumer({ + ...createRdConnectionConfigFromEnvVars(hub), + 'group.id': 'test-group', + }) + + consumer.connect() + consumer.subscribe(topics) + const messages: { + topic: string + value: any + }[] = [] + + const poll = async () => { + await delay(50) + if (!consumer.isConnected()) { + return + } + const newMessages = await new Promise((res, rej) => + consumer.consume(10, (err, messages) => (err ? rej(err) : res(messages))) + ) + + messages.push( + ...newMessages.map((message) => ({ + topic: message.topic, + value: JSON.parse(message.value?.toString() ?? ''), + })) + ) + poll() + } + + poll() + + return { + messages, + consumer, + stop: () => new Promise((res) => consumer.disconnect(res)), + expectMessageCount: async (count: number): Promise => { + const timeout = 5000 + const now = Date.now() + while (messages.length < count && Date.now() - now < timeout) { + await delay(100) + } + + if (messages.length < count) { + throw new Error(`Expected ${count} messages, got ${messages.length}`) + } + }, + } +} diff --git a/plugin-server/tests/helpers/expectations.ts b/plugin-server/tests/helpers/expectations.ts new file mode 100644 index 0000000000000..6a4dcf9b3cc53 --- /dev/null +++ b/plugin-server/tests/helpers/expectations.ts @@ -0,0 +1,17 @@ +export const waitForExpect = async (fn: () => T | Promise, timeout = 10_000, interval = 1_000): Promise => { + // Allows for running expectations that are expected to pass eventually. + // This is useful for, e.g. waiting for events to have been ingested into + // the database. + + const start = Date.now() + while (true) { + try { + return await fn() + } catch (error) { + if (Date.now() - start > timeout) { + throw error + } + await new Promise((resolve) => setTimeout(resolve, interval)) + } + } +} From 98d7dd466f29d277e5f28caa63b3ef5196c02b0f Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 13:59:34 +0200 Subject: [PATCH 22/75] Changes all round --- plugin-server/package.json | 3 +- plugin-server/src/cdp/cdp-consumers.ts | 259 +++++++++++------- plugin-server/src/cdp/hog-function-manager.ts | 4 + plugin-server/src/cdp/utils.ts | 12 + plugin-server/src/config/config.ts | 1 - plugin-server/src/types.ts | 1 - plugin-server/tests/cdp/cdp-e2e.test.ts | 130 +++++---- .../cdp/cdp-processed-events-consumer.test.ts | 10 +- .../tests/cdp/helpers/kafka-observer.ts | 4 +- rust/bin/migrate-cyclotron-test | 7 + rust/cyclotron-node/src/helpers.ts | 30 ++ rust/cyclotron-node/src/index.ts | 211 +------------- rust/cyclotron-node/src/manager.ts | 41 +++ rust/cyclotron-node/src/types.ts | 45 +++ rust/cyclotron-node/src/worker.ts | 90 ++++++ 15 files changed, 474 insertions(+), 374 deletions(-) create mode 100755 rust/bin/migrate-cyclotron-test create mode 100644 rust/cyclotron-node/src/helpers.ts create mode 100644 rust/cyclotron-node/src/manager.ts create mode 100644 rust/cyclotron-node/src/types.ts create mode 100644 rust/cyclotron-node/src/worker.ts diff --git a/plugin-server/package.json b/plugin-server/package.json index 654e9886b655d..4aeed2450c4c0 100644 --- a/plugin-server/package.json +++ b/plugin-server/package.json @@ -23,7 +23,8 @@ "prettier:check": "prettier --check .", "prepublishOnly": "pnpm build", "setup:dev:clickhouse": "cd .. && DEBUG=1 python manage.py migrate_clickhouse", - "setup:test": "cd .. && TEST=1 python manage.py setup_test_environment", + "setup:test": "cd .. && TEST=1 python manage.py setup_test_environment && pnpm setup:test:cyclotron", + "setup:test:cyclotron": "cd ../rust && ./bin/migrate-cyclotron-test", "services:start": "cd .. && docker compose -f docker-compose.dev.yml up", "services:stop": "cd .. && docker compose -f docker-compose.dev.yml down", "services:clean": "cd .. && docker compose -f docker-compose.dev.yml rm -v", diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 215dda1683db4..98f9835a63364 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -1,4 +1,4 @@ -import cyclotron from '@posthog/cyclotron' +import { CyclotronManager, CyclotronWorker } from '@posthog/cyclotron' import { captureException } from '@sentry/node' import { Message } from 'node-rdkafka' import { Counter, Histogram } from 'prom-client' @@ -20,7 +20,7 @@ import { createKafkaProducerWrapper } from '../utils/db/hub' import { KafkaProducerWrapper } from '../utils/db/kafka-producer-wrapper' import { captureTeamEvent } from '../utils/posthog' import { status } from '../utils/status' -import { castTimestampOrNow } from '../utils/utils' +import { castTimestampOrNow, delay } from '../utils/utils' import { RustyHook } from '../worker/rusty-hook' import { FetchExecutor } from './fetch-executor' import { GroupsManager } from './groups-manager' @@ -32,6 +32,7 @@ import { CdpRedis, createCdpRedisPool } from './redis' import { HogFunctionInvocation, HogFunctionInvocationGlobals, + HogFunctionInvocationQueueParameters, HogFunctionInvocationResult, HogFunctionInvocationSerialized, HogFunctionInvocationSerializedCompressed, @@ -45,6 +46,7 @@ import { createInvocation, gzipObject, prepareLogEntriesForClickhouse, + serializeHogFunctionInvocation, unGzipObject, } from './utils' @@ -86,8 +88,6 @@ abstract class CdpConsumerBase { messagesToProduce: HogFunctionMessageToProduce[] = [] redis: CdpRedis - private cyclotronMatcher: ValueMatcher - protected kafkaProducer?: KafkaProducerWrapper protected abstract name: string @@ -104,11 +104,6 @@ abstract class CdpConsumerBase { const rustyHook = this.hub?.rustyHook ?? new RustyHook(this.hub) this.fetchExecutor = new FetchExecutor(this.hub, rustyHook) this.groupsManager = new GroupsManager(this.hub) - this.cyclotronMatcher = buildIntegerMatcher(hub.CDP_CYCLOTRON_ENABLED_TEAMS, false) - } - - protected cyclotronEnabled(invocation: HogFunctionInvocation): boolean { - return !!(this.hub.CYCLOTRON_DATABASE_URL && this.cyclotronMatcher(invocation.globals.project.id)) } private async captureInternalPostHogEvent( @@ -152,8 +147,6 @@ abstract class CdpConsumerBase { return results } - protected abstract _handleKafkaBatch(messages: Message[]): Promise - protected async produceQueuedMessages() { const messages = [...this.messagesToProduce] this.messagesToProduce = [] @@ -201,69 +194,21 @@ abstract class CdpConsumerBase { }) } - protected async queueInvocations(invocation: HogFunctionInvocation[]) { + // NOTE: These will be removed once we are only on Cyclotron + protected async queueInvocationsToKafka(invocation: HogFunctionInvocation[]) { await Promise.all( invocation.map(async (item) => { - await this.queueInvocation(item) + await this.queueInvocationToKafka(item) }) ) } - protected async queueInvocation(invocation: HogFunctionInvocation) { + protected async queueInvocationToKafka(invocation: HogFunctionInvocation) { // TODO: Add cyclotron check here and enqueue that way // For now we just enqueue to kafka // For kafka style this is overkill to enqueue this way but it simplifies migrating to the new system - const serializedInvocation: HogFunctionInvocationSerialized = { - ...invocation, - hogFunctionId: invocation.hogFunction.id, - } - - delete (serializedInvocation as any).hogFunction - - if (this.cyclotronEnabled(invocation)) { - // Cyclotron enabled - if (!invocation.vmState) { - // TODO: Figure out how to convert this effectively - // id: string - // globals: HogFunctionInvocationGlobals - // teamId: Team['id'] - // hogFunction: HogFunctionType - // priority: number - // queue: 'hog' | 'fetch' - // queueParameters?: HogFunctionInvocationQueueParameters - // // The current vmstate (set if the invocation is paused) - // vmState?: VMState - // timings: HogFunctionTiming[] - - const job = await cyclotron.createJob({ - teamId: invocation.globals.project.id, - functionId: invocation.hogFunction.id, - queueName: invocation.queue, - parameters: invocation.queueParameters ? JSON.stringify(invocation.queueParameters) : undefined, - // queueBlob: invocation.blob, - priority: invocation.priority, - vmState: JSON.stringify(serializedInvocation), // TODO: This doesn't feel right but we need timings, globals and vmstate to all be somewhere :thinking: - }) - - console.log('Created job', job) - } else { - // Ideally we could just have an "upsertJob" method or something... - cyclotron.setQueue(invocation.id, invocation.queue) - cyclotron.setVmState(invocation.id, serializedInvocation) - cyclotron.setPriority(invocation.id, invocation.priority) - cyclotron.setParameters(invocation.id, invocation.queueParameters ?? null) - - // await cyclotron.updateJob(invocation.id, { - // queue: invocation.queue, - // parameters: invocation.queueParameters ? JSON.stringify(invocation.queueParameters) : undefined, - // priority: invocation.priority, - // vmState: JSON.stringify(serializedInvocation), - // }) - } - - return - } + const serializedInvocation = serializeHogFunctionInvocation(invocation) const request: HogFunctionInvocationSerializedCompressed = { state: await gzipObject(serializedInvocation), @@ -279,10 +224,10 @@ abstract class CdpConsumerBase { } protected async processInvocationResults(results: HogFunctionInvocationResult[]): Promise { - await runInstrumentedFunction({ + return await runInstrumentedFunction({ statsKey: `cdpConsumer.handleEachBatch.produceResults`, func: async () => { - console.log('Processing invocations results', results.length) + await this.hogWatcher.observeResults(results) await Promise.all( results.map(async (result) => { @@ -314,18 +259,17 @@ abstract class CdpConsumerBase { key: `${team!.api_token}:${event.distinct_id}`, }) } - - if (!result.finished) { - // If it isn't finished then we need to put it back on the queue - await this.queueInvocation(result.invocation) - } }) ) }, }) } - protected async startKafkaConsumer(options: { topic: string; groupId: string }): Promise { + protected async startKafkaConsumer(options: { + topic: string + groupId: string + handleBatch: (messages: Message[]) => Promise + }): Promise { this.batchConsumer = await startBatchConsumer({ ...options, connectionConfig: createRdConnectionConfigFromEnvVars(this.hub), @@ -359,7 +303,7 @@ abstract class CdpConsumerBase { statsKey: `cdpConsumer.handleEachBatch`, sendTimeoutGuardToSentry: false, func: async () => { - await this._handleKafkaBatch(messages) + await options.handleBatch(messages) }, }) }, @@ -383,13 +327,11 @@ abstract class CdpConsumerBase { // NOTE: This is only for starting shared services await Promise.all([ this.hogFunctionManager.start(), - this.hub.CYCLOTRON_DATABASE_URL - ? cyclotron.initManager({ shards: [{ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }] }) - : Promise.resolve(), + createKafkaProducerWrapper(this.hub).then((producer) => { + this.kafkaProducer = producer + this.kafkaProducer.producer.connect() + }), ]) - - this.kafkaProducer = await createKafkaProducerWrapper(this.hub) - this.kafkaProducer.producer.connect() } public async stop(): Promise { @@ -416,10 +358,21 @@ abstract class CdpConsumerBase { /** * This consumer handles incoming events from the main clickhouse topic + * Currently it produces to both kafka and Cyclotron based on the team */ - export class CdpProcessedEventsConsumer extends CdpConsumerBase { protected name = 'CdpProcessedEventsConsumer' + private cyclotronMatcher: ValueMatcher + private cyclotronManager?: CyclotronManager + + constructor(hub: Hub) { + super(hub) + this.cyclotronMatcher = buildIntegerMatcher(hub.CDP_CYCLOTRON_ENABLED_TEAMS, true) + } + + private cyclotronEnabled(invocation: HogFunctionInvocation): boolean { + return !!(this.cyclotronManager && this.cyclotronMatcher(invocation.globals.project.id)) + } public async processBatch(invocationGlobals: HogFunctionInvocationGlobals[]): Promise { if (!invocationGlobals.length) { @@ -430,9 +383,35 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { this.createHogFunctionInvocations(invocationGlobals) ) - if (this.hub.CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP) { - // NOTE: This is for testing the two ways of enqueueing processing. It will be swapped out for a cyclotron env check - // Kafka based workflow + // Split out the cyclotron invocations + const [cyclotronInvocations, kafkaInvocations] = invocationsToBeQueued.reduce( + (acc, item) => { + if (this.cyclotronEnabled(item)) { + acc[0].push(item) + } else { + acc[1].push(item) + } + + return acc + }, + [[], []] as [HogFunctionInvocation[], HogFunctionInvocation[]] + ) + + // For the cyclotron ones we simply create the jobs + await Promise.all( + cyclotronInvocations.map((item) => + this.cyclotronManager?.createJob({ + teamId: item.globals.project.id, + functionId: item.hogFunction.id, + queueName: 'hog', + priority: item.priority, + vmState: serializeHogFunctionInvocation(item), + }) + ) + ) + + if (kafkaInvocations.length) { + // As we don't want to over-produce to kafka we invoke the hog functions and then queue the results const invocationResults = await runInstrumentedFunction({ statsKey: `cdpConsumer.handleEachBatch.executeInvocations`, func: async () => { @@ -443,10 +422,9 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { }, }) - await this.hogWatcher.observeResults(invocationResults) await this.processInvocationResults(invocationResults) - } else { - await this.queueInvocations(invocationsToBeQueued) + const newInvocations = invocationResults.filter((r) => !r.finished).map((r) => r.invocation) + await this.queueInvocationsToKafka(newInvocations) } await this.produceQueuedMessages() @@ -457,7 +435,6 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { /** * Finds all matching hog functions for the given globals. * Filters them for their disabled state as well as masking configs - * */ protected async createHogFunctionInvocations( invocationGlobals: HogFunctionInvocationGlobals[] @@ -583,12 +560,19 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { await this.startKafkaConsumer({ topic: KAFKA_EVENTS_JSON, groupId: 'cdp-processed-events-consumer', + handleBatch: (messages) => this._handleKafkaBatch(messages), }) + + this.cyclotronManager = this.hub.CYCLOTRON_DATABASE_URL + ? new CyclotronManager({ shards: [{ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }] }) + : undefined + + await this.cyclotronManager?.connect() } } /** - * This consumer handles actually invoking hog in a loop + * This consumer only deals with kafka messages and will eventually be replaced by the Cyclotron worker */ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { protected name = 'CdpFunctionCallbackConsumer' @@ -614,8 +598,9 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { }, }) - await this.hogWatcher.observeResults(invocationResults) await this.processInvocationResults(invocationResults) + const newInvocations = invocationResults.filter((r) => !r.finished).map((r) => r.invocation) + await this.queueInvocationsToKafka(newInvocations) await this.produceQueuedMessages() } @@ -701,27 +686,87 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { await this.startKafkaConsumer({ topic: KAFKA_CDP_FUNCTION_CALLBACKS, groupId: 'cdp-function-callback-consumer', + handleBatch: (messages) => this._handleKafkaBatch(messages), }) } } -export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { +/** + * The future of the CDP consumer. This will be the main consumer that will handle all hog jobs from Cyclotron + */ +export class CdpCyclotronWorker extends CdpConsumerBase { protected name = 'CdpCyclotronWorker' - + private cyclotronWorker?: CyclotronWorker private runningWorker: Promise | undefined private isUnhealthy = false + protected queue: 'hog' | 'fetch' = 'hog' + protected limit = 100 + + public async processBatch(invocations: HogFunctionInvocation[]): Promise { + if (!invocations.length) { + return + } + + const invocationResults = await runInstrumentedFunction({ + statsKey: `cdpConsumer.handleEachBatch.executeInvocations`, + func: async () => { + // NOTE: In the future this service will never do fetching (unless we decide we want to do it in node at some point) + // This is just "for now" to support the transition to cyclotron + console.log('processing invocations', invocations) + const fetchQueue = invocations.filter((item) => item.queue === 'fetch') + const fetchResults = await this.runManyWithHeartbeat(fetchQueue, (item) => + this.fetchExecutor.execute(item) + ) + + const hogQueue = invocations.filter((item) => item.queue === 'hog') + const hogResults = await this.runManyWithHeartbeat(hogQueue, (item) => this.hogExecutor.execute(item)) + return [...hogResults, ...(fetchResults.filter(Boolean) as HogFunctionInvocationResult[])] + }, + }) + + console.log('invocationResults', invocationResults) + + await this.processInvocationResults(invocationResults) + await this.updateJobs(invocationResults) + await this.produceQueuedMessages() + } + + private async updateJobs(invocations: HogFunctionInvocationResult[]) { + await Promise.all( + invocations.map(async (item) => { + const id = item.invocation.id + if (item.finished) { + console.log('Updating job to completed', id) + this.cyclotronWorker?.updateJob(id, 'completed') + } else { + console.log('Updating job to available', id) + this.cyclotronWorker?.updateJob(id, 'available', { + priority: item.invocation.priority, + vmState: item.invocation, + queueName: item.invocation.queue, + parameters: item.invocation.queueParameters ?? null, + }) + } + await this.cyclotronWorker?.flushJob(id) + }) + ) + } private async innerStart() { try { - const limit = 100 // TODO: Make configurable. while (!this.isStopping) { // TODO: Add a timeout check - await cyclotron.dequeueJobsWithVmState('hog', limit, (jobs) => {}) - - const jobs = await cyclotron.dequeueJobsWithVmState('hog', limit) + console.log('Dequeueing jobs') + const jobs = await this.cyclotronWorker!.dequeueJobsWithVmState(this.queue, this.limit) // TODO: How do we "hold" these dequeued jobs? const invocations: HogFunctionInvocation[] = [] + if (!jobs.length) { + await delay(100) + return + } + console.log('Dequeued jobs', this.queue, jobs) + for (const job of jobs) { // NOTE: This is all a bit messy and might be better to refactor into a helper if (!job.functionId) { @@ -730,13 +775,15 @@ export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { const hogFunction = this.hogFunctionManager.getHogFunction(job.functionId) if (!hogFunction) { + // Here we need to mark the job as failed + status.error('Error finding hog function', { id: job.functionId, }) - return + continue } - const parsedState = JSON.parse(job.metadata!) as HogFunctionInvocationSerialized + const parsedState = job.vmState as HogFunctionInvocationSerialized // TODO: Should ID come from the job or the state? invocations.push({ @@ -745,10 +792,10 @@ export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { teamId: hogFunction.team_id, hogFunction, priority: job.priority, - queue: job.queueName ?? 'hog', - queueParameters: job.parameters ? JSON.parse(job.parameters) : undefined, + queue: (job.queueName as any) ?? 'hog', + queueParameters: job.parameters as HogFunctionInvocationQueueParameters | undefined, vmState: parsedState.vmState, - timings: [], + timings: parsedState.timings, }) } @@ -759,11 +806,15 @@ export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { console.error('Error in Cyclotron worker', err) throw err } + + console.log('Cyclotron worker stopped') } public async start() { await super.start() - await cyclotron.initWorker({ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }) + + this.cyclotronWorker = new CyclotronWorker({ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }) + await this.cyclotronWorker.connect() // Consumer `start` expects an async task is started, and not that `start` itself blocks // indefinitely. @@ -774,6 +825,7 @@ export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { public async stop() { await super.stop() + // this.cyclotronWorker.disconnect() await this.runningWorker } @@ -781,3 +833,8 @@ export class CdpCyclotronWorker extends CdpFunctionCallbackConsumer { return this.isUnhealthy } } + +export class CdpCyclotronWorkerFetch extends CdpCyclotronWorker { + protected name = 'CdpCyclotronWorkerFetch' + protected queue = 'fetch' as const +} diff --git a/plugin-server/src/cdp/hog-function-manager.ts b/plugin-server/src/cdp/hog-function-manager.ts index d356e6d66ce10..389befec7bbe9 100644 --- a/plugin-server/src/cdp/hog-function-manager.ts +++ b/plugin-server/src/cdp/hog-function-manager.ts @@ -95,6 +95,8 @@ export class HogFunctionManager { if (!this.ready) { throw new Error('HogFunctionManager is not ready! Run HogFunctionManager.start() before this') } + console.log(this.cache.functions) + return this.cache.functions[id] } @@ -102,6 +104,7 @@ export class HogFunctionManager { if (!this.ready) { throw new Error('HogFunctionManager is not ready! Run HogFunctionManager.start() before this') } + const fn = this.cache.functions[hogFunctionId] if (fn?.team_id === teamId) { return fn @@ -141,6 +144,7 @@ export class HogFunctionManager { this.cache = cache status.info('🍿', 'Fetched all hog functions from DB anew') + console.log('Fetched all hog functions from DB anew') } public async reloadHogFunctions(teamId: Team['id'], ids: HogFunctionType['id'][]): Promise { diff --git a/plugin-server/src/cdp/utils.ts b/plugin-server/src/cdp/utils.ts index 934afc968e2fb..22d7dcb57f7b3 100644 --- a/plugin-server/src/cdp/utils.ts +++ b/plugin-server/src/cdp/utils.ts @@ -12,6 +12,7 @@ import { HogFunctionInvocation, HogFunctionInvocationGlobals, HogFunctionInvocationResult, + HogFunctionInvocationSerialized, HogFunctionLogEntrySerialized, HogFunctionType, ParsedClickhouseEvent, @@ -225,3 +226,14 @@ export function createInvocation( timings: [], } } + +export function serializeHogFunctionInvocation(invocation: HogFunctionInvocation): HogFunctionInvocationSerialized { + const serializedInvocation: HogFunctionInvocationSerialized = { + ...invocation, + hogFunctionId: invocation.hogFunction.id, + } + + delete (serializedInvocation as any).hogFunction + + return serializedInvocation +} diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index d68a0f7f21d9e..d5f391d9e1292 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -189,7 +189,6 @@ export function getDefaultConfig(): PluginsServerConfig { CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS: '', CDP_CYCLOTRON_ENABLED_TEAMS: '', CDP_REDIS_PASSWORD: '', - CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP: true, CDP_REDIS_HOST: '', CDP_REDIS_PORT: 6479, diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index 6f9204d8d2907..8b8ccfcca357c 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -111,7 +111,6 @@ export type CdpConfig = { CDP_REDIS_HOST: string CDP_REDIS_PORT: number CDP_REDIS_PASSWORD: string - CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP: boolean } export interface PluginsServerConfig extends CdpConfig { diff --git a/plugin-server/tests/cdp/cdp-e2e.test.ts b/plugin-server/tests/cdp/cdp-e2e.test.ts index da90bd9a58e35..52d545d1f0449 100644 --- a/plugin-server/tests/cdp/cdp-e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-e2e.test.ts @@ -1,4 +1,9 @@ -import { CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../../src/cdp/cdp-consumers' +import { + CdpCyclotronWorker, + CdpCyclotronWorkerFetch, + CdpFunctionCallbackConsumer, + CdpProcessedEventsConsumer, +} from '../../src/cdp/cdp-consumers' import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' import { KAFKA_APP_METRICS_2, KAFKA_LOG_ENTRIES } from '../../src/config/kafka-topics' import { Hub, Team } from '../../src/types' @@ -24,64 +29,63 @@ jest.mock('../../src/utils/fetch', () => { const mockFetch: jest.Mock = require('../../src/utils/fetch').trackedFetch describe('CDP E2E', () => { - let processedEventsConsumer: CdpProcessedEventsConsumer - let functionProcessor: CdpFunctionCallbackConsumer - let hub: Hub - let closeHub: () => Promise - let team: Team - let kafkaObserver: TestKafkaObserver - - const insertHogFunction = async (hogFunction: Partial) => { - const item = await _insertHogFunction(hub.postgres, team.id, hogFunction) - // Trigger the reload that django would do - await processedEventsConsumer.hogFunctionManager.reloadAllHogFunctions() - await functionProcessor.hogFunctionManager.reloadAllHogFunctions() - return item - } - - beforeEach(async () => { - await resetTestDatabase() - ;[hub, closeHub] = await createHub() - team = await getFirstTeam(hub) - - kafkaObserver = await createKafkaObserver(hub, [KAFKA_APP_METRICS_2, KAFKA_LOG_ENTRIES]) - - processedEventsConsumer = new CdpProcessedEventsConsumer(hub) - await processedEventsConsumer.start() - functionProcessor = new CdpFunctionCallbackConsumer(hub) - await functionProcessor.start() - - mockFetch.mockClear() - }) - - afterEach(async () => { - try { - await Promise.all([processedEventsConsumer.stop(), functionProcessor.stop(), kafkaObserver.stop()]) - await closeHub() - } catch (e) { - console.error('Error in afterEach:', e) - } - }) - - afterAll(() => { - jest.useRealTimers() - }) - - describe.each(['kafka', 'cyclotron'])('e2e fetch call: %s', (mode) => { - /** - * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios - */ - + describe.each([ + // 'kafka', + 'cyclotron', + ])('e2e fetch call: %s', (mode) => { + let processedEventsConsumer: CdpProcessedEventsConsumer + let functionProcessor: CdpFunctionCallbackConsumer + let cyclotronWorker: CdpCyclotronWorker | undefined + let cyclotronFetchWorker: CdpCyclotronWorkerFetch | undefined + let hub: Hub + let closeHub: () => Promise + let team: Team + let kafkaObserver: TestKafkaObserver let fnFetchNoFilters: HogFunctionType let globals: HogFunctionInvocationGlobals + const insertHogFunction = async (hogFunction: Partial) => { + const item = await _insertHogFunction(hub.postgres, team.id, hogFunction) + // Trigger the reload that django would do + // await processedEventsConsumer.hogFunctionManager.reloadAllHogFunctions() + // await functionProcessor.hogFunctionManager.reloadAllHogFunctions() + // await cyclotronWorker?.hogFunctionManager.reloadAllHogFunctions() + // await cyclotronFetchWorker?.hogFunctionManager.reloadAllHogFunctions() + return item + } + beforeEach(async () => { + await resetTestDatabase() + ;[hub, closeHub] = await createHub() + team = await getFirstTeam(hub) + fnFetchNoFilters = await insertHogFunction({ ...HOG_EXAMPLES.simple_fetch, ...HOG_INPUTS_EXAMPLES.simple_fetch, ...HOG_FILTERS_EXAMPLES.no_filters, }) + console.log(fnFetchNoFilters.id) + + if (mode === 'cyclotron') { + hub.CDP_CYCLOTRON_ENABLED_TEAMS = '*' + hub.CYCLOTRON_DATABASE_URL = 'postgres://posthog:posthog@localhost:5432/test_cyclotron' + } + + kafkaObserver = await createKafkaObserver(hub, [KAFKA_APP_METRICS_2, KAFKA_LOG_ENTRIES]) + + processedEventsConsumer = new CdpProcessedEventsConsumer(hub) + await processedEventsConsumer.start() + functionProcessor = new CdpFunctionCallbackConsumer(hub) + await functionProcessor.start() + + if (mode === 'cyclotron') { + cyclotronWorker = new CdpCyclotronWorker(hub) + await cyclotronWorker.start() + cyclotronFetchWorker = new CdpCyclotronWorkerFetch(hub) + await cyclotronFetchWorker.start() + } + globals = createHogExecutionGlobals({ project: { id: team.id, @@ -97,13 +101,33 @@ describe('CDP E2E', () => { } as any, }) - if (mode === 'cyclotron') { - hub.CDP_CYCLOTRON_ENABLED_TEAMS = '*' - hub.CYCLOTRON_DATABASE_URL = 'postgres://localhost:5432/test_cyclotron' + mockFetch.mockClear() + }) + + afterEach(async () => { + try { + await Promise.all([ + processedEventsConsumer?.stop(), + functionProcessor?.stop(), + kafkaObserver?.stop(), + cyclotronWorker?.stop(), + cyclotronFetchWorker?.stop(), + ]) + await closeHub() + } catch (e) { + console.error('Error in afterEach:', e) } }) - it('should invoke a function via kafka transportation until completed', async () => { + afterAll(() => { + jest.useRealTimers() + }) + + /** + * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios + */ + + it('should invoke a function in the worker loop until completed', async () => { // NOTE: We can skip kafka as the entry point const invocations = await processedEventsConsumer.processBatch([globals]) expect(invocations).toHaveLength(1) diff --git a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts index 713a478ac1b14..03ff876d25b78 100644 --- a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts +++ b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts @@ -114,10 +114,6 @@ describe('CDP Processed Events Consumer', () => { }) describe('general event processing', () => { - beforeEach(() => { - hub.CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP = false - }) - describe('common processing', () => { let fnFetchNoFilters: HogFunctionType let fnPrinterPageviewFilters: HogFunctionType @@ -297,9 +293,9 @@ describe('CDP Processed Events Consumer', () => { }) describe('no delayed execution', () => { - beforeEach(() => { - hub.CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP = true - }) + // beforeEach(() => { + // hub.CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP = true + // }) it('should invoke the initial function before enqueuing', async () => { await insertHogFunction({ diff --git a/plugin-server/tests/cdp/helpers/kafka-observer.ts b/plugin-server/tests/cdp/helpers/kafka-observer.ts index ec5233e72a798..5808fa0ca3cda 100644 --- a/plugin-server/tests/cdp/helpers/kafka-observer.ts +++ b/plugin-server/tests/cdp/helpers/kafka-observer.ts @@ -3,7 +3,7 @@ import { KafkaConsumer, Message } from 'node-rdkafka' import { createRdConnectionConfigFromEnvVars } from '../../../src/kafka/config' import { createKafkaConsumer } from '../../../src/kafka/consumer' import { Hub } from '../../../src/types' -import { delay } from '../../../src/utils/utils' +import { delay, UUIDT } from '../../../src/utils/utils' export type TestKafkaObserver = { messages: { @@ -18,7 +18,7 @@ export type TestKafkaObserver = { export const createKafkaObserver = async (hub: Hub, topics: string[]): Promise => { const consumer = await createKafkaConsumer({ ...createRdConnectionConfigFromEnvVars(hub), - 'group.id': 'test-group', + 'group.id': `test-group-${new UUIDT().toString()}`, }) consumer.connect() diff --git a/rust/bin/migrate-cyclotron-test b/rust/bin/migrate-cyclotron-test new file mode 100755 index 0000000000000..04a139c8d9ba9 --- /dev/null +++ b/rust/bin/migrate-cyclotron-test @@ -0,0 +1,7 @@ +#!/bin/sh + +export DATABASE_NAME=${DATABASE_NAME:-test_cyclotron} +export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} + +sqlx database create -D "$DATABASE_URL" +sqlx migrate run -D "$DATABASE_URL" --source ./cyclotron-core/migrations diff --git a/rust/cyclotron-node/src/helpers.ts b/rust/cyclotron-node/src/helpers.ts new file mode 100644 index 0000000000000..a26f788da5e5b --- /dev/null +++ b/rust/cyclotron-node/src/helpers.ts @@ -0,0 +1,30 @@ +import { InternalPoolConfig, PoolConfig } from './types' + +export function convertToInternalPoolConfig(poolConfig: PoolConfig): InternalPoolConfig { + return { + db_url: poolConfig.dbUrl, + max_connections: poolConfig.maxConnections, + min_connections: poolConfig.minConnections, + acquire_timeout_seconds: poolConfig.acquireTimeoutSeconds, + max_lifetime_seconds: poolConfig.maxLifetimeSeconds, + idle_timeout_seconds: poolConfig.idleTimeoutSeconds, + } +} + +export function serializeObject(name: string, obj: Record | null): string | null { + if (obj === null) { + return null + } else if (typeof obj === 'object' && obj !== null) { + return JSON.stringify(obj) + } + throw new Error(`${name} must be either an object or null`) +} + +export function deserializeObject(name: string, str: any): Record | null { + if (str === null) { + return null + } else if (typeof str === 'string') { + return JSON.parse(str) + } + throw new Error(`${name} must be either a string or null`) +} diff --git a/rust/cyclotron-node/src/index.ts b/rust/cyclotron-node/src/index.ts index d14a14868cbab..e905c5f6cd4ad 100644 --- a/rust/cyclotron-node/src/index.ts +++ b/rust/cyclotron-node/src/index.ts @@ -1,208 +1,3 @@ -// eslint-disable-next-line @typescript-eslint/no-var-requires -const cyclotron = require('../index.node') - -export type PoolConfig = { - dbUrl: string - maxConnections?: number - minConnections?: number - acquireTimeoutSeconds?: number - maxLifetimeSeconds?: number - idleTimeoutSeconds?: number -} - -// Type as expected by Cyclotron. -type InternalPoolConfig = { - db_url: string - max_connections?: number - min_connections?: number - acquire_timeout_seconds?: number - max_lifetime_seconds?: number - idle_timeout_seconds?: number -} - -export type ManagerConfig = { - shards: PoolConfig[] -} - -// Type as expected by Cyclotron. -type InternalManagerConfig = { - shards: InternalPoolConfig[] -} - -// Type as expected by Cyclotron. -type InternalJobInit = { - team_id: number - function_id: string - queue_name: string - priority?: number - scheduled?: Date - vm_state?: string - parameters?: string - metadata?: string -} - -export type JobState = 'available' | 'running' | 'completed' | 'failed' | 'paused' - -export type Job = { - id: string - teamId: number - functionId: string | null - created: Date - lockId: string | null - lastHeartbeat: Date | null - janitorTouchCount: number - transitionCount: number - lastTransition: Date - queueName: string - state: JobState - priority: number - scheduled: Date - vmState: string | null - metadata: string | null - parameters: string | null - blob: Uint8Array | null -} - -export type JobInit = { - teamId: number - functionId: string - queueName: string - priority?: number - scheduled?: Date - vmState?: string - parameters?: string - blob?: Uint8Array - metadata?: string -} - -// helpers - -function serializeObject(name: string, obj: Record | null): string | null { - if (obj === null) { - return null - } else if (typeof obj === 'object' && obj !== null) { - return JSON.stringify(obj) - } - throw new Error(`${name} must be either an object or null`) -} - -function convertToInternalPoolConfig(poolConfig: PoolConfig): InternalPoolConfig { - return { - db_url: poolConfig.dbUrl, - max_connections: poolConfig.maxConnections, - min_connections: poolConfig.minConnections, - acquire_timeout_seconds: poolConfig.acquireTimeoutSeconds, - max_lifetime_seconds: poolConfig.maxLifetimeSeconds, - idle_timeout_seconds: poolConfig.idleTimeoutSeconds, - } -} - -// Management API -async function initWorker(poolConfig: PoolConfig): Promise { - return await cyclotron.initWorker(JSON.stringify(convertToInternalPoolConfig(poolConfig))) -} - -async function initManager(managerConfig: ManagerConfig): Promise { - const managerConfigInternal: InternalManagerConfig = { - shards: managerConfig.shards.map((shard) => convertToInternalPoolConfig(shard)), - } - return await cyclotron.initManager(JSON.stringify(managerConfigInternal)) -} - -async function maybeInitWorker(poolConfig: PoolConfig): Promise { - return await cyclotron.maybeInitWorker(JSON.stringify(convertToInternalPoolConfig(poolConfig))) -} - -async function maybeInitManager(managerConfig: ManagerConfig): Promise { - const managerConfigInternal: InternalManagerConfig = { - shards: managerConfig.shards.map((shard) => convertToInternalPoolConfig(shard)), - } - return await cyclotron.maybeInitManager(JSON.stringify(managerConfigInternal)) -} - -async function dequeueJobs(queueName: string, limit: number): Promise { - return await cyclotron.dequeueJobs(queueName, limit) -} - -async function dequeueJobsWithVmState(queueName: string, limit: number): Promise { - return await cyclotron.dequeueJobsWithVmState(queueName, limit) -} - -async function flushJob(jobId: string): Promise { - return await cyclotron.flushJob(jobId) -} - -// Job API -async function createJob(job: JobInit): Promise { - job.priority ??= 1 - job.scheduled ??= new Date() - - const jobInitInternal: InternalJobInit = { - team_id: job.teamId, - function_id: job.functionId, - queue_name: job.queueName, - priority: job.priority, - scheduled: job.scheduled, - vm_state: job.vmState, - parameters: job.parameters, - metadata: job.metadata, - } - - const json = JSON.stringify(jobInitInternal) - return await cyclotron.createJob(json, job.blob ? job.blob.buffer : undefined) -} - -// TODO: Remove promise type returns -function setState(jobId: string, jobState: JobState): void { - return cyclotron.setState(jobId, jobState) -} - -function setQueue(jobId: string, queueName: string): void { - return cyclotron.setQueue(jobId, queueName) -} - -function setPriority(jobId: string, priority: number): void { - return cyclotron.setPriority(jobId, priority) -} - -function setScheduledAt(jobId: string, scheduledAt: Date): void { - return cyclotron.setScheduledAt(jobId, scheduledAt.toISOString()) -} - -function setVmState(jobId: string, vmState: Record | null): void { - const serialized = serializeObject('vmState', vmState) - return cyclotron.setVmState(jobId, serialized) -} - -function setMetadata(jobId: string, metadata: Record | null): void { - const serialized = serializeObject('metadata', metadata) - return cyclotron.setMetadata(jobId, serialized) -} - -function setParameters(jobId: string, parameters: Record | null): void { - const serialized = serializeObject('parameters', parameters) - return cyclotron.setParameters(jobId, serialized) -} - -function setBlob(jobId: string, blob: Uint8Array | null): void { - return cyclotron.setBlob(jobId, blob) -} - -export default { - initWorker, - initManager, - maybeInitWorker, - maybeInitManager, - createJob, - dequeueJobs, - dequeueJobsWithVmState, - flushJob, - setState, - setQueue, - setPriority, - setScheduledAt, - setVmState, - setMetadata, - setParameters, - setBlob, -} +export * from './manager' +export * from './types' +export * from './worker' diff --git a/rust/cyclotron-node/src/manager.ts b/rust/cyclotron-node/src/manager.ts new file mode 100644 index 0000000000000..0cffdd750cf01 --- /dev/null +++ b/rust/cyclotron-node/src/manager.ts @@ -0,0 +1,41 @@ +// eslint-disable-next-line @typescript-eslint/no-var-requires +const cyclotron = require('../index.node') + +import { convertToInternalPoolConfig, serializeObject } from './helpers' +import { JobInit, PoolConfig } from './types' + +export class CyclotronManager { + constructor(private config: { shards: PoolConfig[] }) { + this.config = config + } + + async connect(): Promise { + return await cyclotron.maybeInitManager( + JSON.stringify({ + shards: this.config.shards.map((shard) => convertToInternalPoolConfig(shard)), + }) + ) + } + + async createJob(job: JobInit): Promise { + job.priority ??= 1 + job.scheduled ??= new Date() + + // TODO: Why is this type of job snake case whereas the dequeue return type is camel case? + const jobInitInternal = { + team_id: job.teamId, + function_id: job.functionId, + queue_name: job.queueName, + priority: job.priority, + scheduled: job.scheduled, + vm_state: job.vmState ? serializeObject('vmState', job.vmState) : null, + parameters: job.parameters ? serializeObject('parameters', job.parameters) : null, + metadata: job.metadata ? serializeObject('metadata', job.metadata) : null, + } + + console.log('Creating job:', jobInitInternal) + + const json = JSON.stringify(jobInitInternal) + return await cyclotron.createJob(json, job.blob ? job.blob.buffer : undefined) + } +} diff --git a/rust/cyclotron-node/src/types.ts b/rust/cyclotron-node/src/types.ts new file mode 100644 index 0000000000000..4b38657c2ca53 --- /dev/null +++ b/rust/cyclotron-node/src/types.ts @@ -0,0 +1,45 @@ +export type PoolConfig = { + dbUrl: string + maxConnections?: number + minConnections?: number + acquireTimeoutSeconds?: number + maxLifetimeSeconds?: number + idleTimeoutSeconds?: number +} + +// Type as expected by Cyclotron. +export type InternalPoolConfig = { + db_url: string + max_connections?: number + min_connections?: number + acquire_timeout_seconds?: number + max_lifetime_seconds?: number + idle_timeout_seconds?: number +} + +export type JobState = 'available' | 'running' | 'completed' | 'failed' | 'paused' + +export type Job = { + id: string + teamId: number + functionId: string | null + created: Date + lockId: string | null + lastHeartbeat: Date | null + janitorTouchCount: number + transitionCount: number + lastTransition: Date + queueName: string + state: JobState + priority: number + scheduled: Date + vmState: object | null + metadata: object | null + parameters: object | null + blob: Uint8Array | null +} + +export type JobInit = Pick & + Pick, 'scheduled' | 'vmState' | 'parameters' | 'metadata' | 'blob'> + +export type JobUpdate = Pick, 'queueName' | 'priority' | 'vmState' | 'parameters' | 'metadata' | 'blob'> diff --git a/rust/cyclotron-node/src/worker.ts b/rust/cyclotron-node/src/worker.ts new file mode 100644 index 0000000000000..8827b7b615798 --- /dev/null +++ b/rust/cyclotron-node/src/worker.ts @@ -0,0 +1,90 @@ +// eslint-disable-next-line @typescript-eslint/no-var-requires +const cyclotron = require('../index.node') +import { convertToInternalPoolConfig, deserializeObject, serializeObject } from './helpers' +import { Job, JobState, JobUpdate, PoolConfig } from './types' + +const parseJob = (job: Job): Job => { + return { + ...job, + vmState: deserializeObject('vmState', job.vmState), + metadata: deserializeObject('metadata', job.metadata), + parameters: deserializeObject('parameters', job.parameters), + } +} + +export class CyclotronWorker { + constructor(private config: PoolConfig) { + this.config = config + } + + async connect(): Promise { + return await cyclotron.maybeInitWorker(JSON.stringify(convertToInternalPoolConfig(this.config))) + } + + async dequeueJobs(queueName: string, limit: number): Promise { + return (await cyclotron.dequeueJobs(queueName, limit)).map(parseJob) + } + + async dequeueJobsWithVmState(queueName: string, limit: number): Promise { + return (await cyclotron.dequeueJobsWithVmState(queueName, limit)).map(parseJob) + } + + async flushJob(jobId: string): Promise { + return await cyclotron.flushJob(jobId) + } + + updateJob(id: Job['id'], state: JobState, updates?: JobUpdate): void { + cyclotron.setState(id, state) + if (updates?.queueName) { + cyclotron.setQueue(id, updates.queueName) + } + if (updates?.priority) { + cyclotron.setPriority(id, updates.priority) + } + if (updates?.parameters) { + cyclotron.setParameters(id, serializeObject('parameters', updates.parameters)) + } + if (updates?.metadata) { + cyclotron.setMetadata(id, updates.metadata) + } + + if (updates?.vmState) { + cyclotron.setMetadata(id, updates.metadata) + } + } + + // setState(jobId: string, jobState: JobState): void { + // return cyclotron.setState(jobId, jobState) + // } + + // setQueue(jobId: string, queueName: string): void { + // return cyclotron.setQueue(jobId, queueName) + // } + + // setPriority(jobId: string, priority: number): void { + // return cyclotron.setPriority(jobId, priority) + // } + + // setScheduledAt(jobId: string, scheduledAt: Date): void { + // return cyclotron.setScheduledAt(jobId, scheduledAt.toISOString()) + // } + + // setVmState(jobId: string, vmState: Record | null): void { + // const serialized = serializeObject('vmState', vmState) + // return cyclotron.setVmState(jobId, serialized) + // } + + // setMetadata(jobId: string, metadata: Record | null): void { + // const serialized = serializeObject('metadata', metadata) + // return cyclotron.setMetadata(jobId, serialized) + // } + + // setParameters(jobId: string, parameters: Record | null): void { + // const serialized = serializeObject('parameters', parameters) + // return cyclotron.setParameters(jobId, serialized) + // } + + // setBlob(jobId: string, blob: Uint8Array | null): void { + // return cyclotron.setBlob(jobId, blob) + // } +} From eff73c62f61c3a986eaafb01953a1e4e6ae1540f Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 14:12:18 +0200 Subject: [PATCH 23/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 98f9835a63364..7aa9dd15d1a2e 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -755,9 +755,17 @@ export class CdpCyclotronWorker extends CdpConsumerBase { private async innerStart() { try { while (!this.isStopping) { - // TODO: Add a timeout check - console.log('Dequeueing jobs') - const jobs = await this.cyclotronWorker!.dequeueJobsWithVmState(this.queue, this.limit) + const jobs = await runInstrumentedFunction({ + statsKey: `cdpConsumer.cyclotronWorker.dequeueJobsWithVmState`, + func: async () => { + status.info('!', `Dequeing jobs: ${this.queue}`) + return await this.cyclotronWorker!.dequeueJobsWithVmState(this.queue, this.limit) + }, + timeout: 3000, + }) + + status.info('!', `Dequeued jobs ${jobs.length}: ${this.queue}`) + // TODO: How do we "hold" these dequeued jobs? const invocations: HogFunctionInvocation[] = [] @@ -765,7 +773,8 @@ export class CdpCyclotronWorker extends CdpConsumerBase { await delay(100) return } - console.log('Dequeued jobs', this.queue, jobs) + + console.log('jobs', jobs) for (const job of jobs) { // NOTE: This is all a bit messy and might be better to refactor into a helper @@ -780,6 +789,8 @@ export class CdpCyclotronWorker extends CdpConsumerBase { status.error('Error finding hog function', { id: job.functionId, }) + this.cyclotronWorker?.updateJob(job.id, 'failed') + await this.cyclotronWorker?.flushJob(job.id) continue } From 831b362810a670e2826955f84f024c9577b4908f Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 14:30:52 +0200 Subject: [PATCH 24/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 24 ++----- plugin-server/src/cdp/hog-function-manager.ts | 2 - plugin-server/tests/cdp/cdp-e2e.test.ts | 62 ++++++++----------- rust/cyclotron-node/src/manager.ts | 2 - rust/cyclotron-node/src/worker.ts | 5 +- 5 files changed, 35 insertions(+), 60 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 7aa9dd15d1a2e..81dee5a968051 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -724,8 +724,6 @@ export class CdpCyclotronWorker extends CdpConsumerBase { }, }) - console.log('invocationResults', invocationResults) - await this.processInvocationResults(invocationResults) await this.updateJobs(invocationResults) await this.produceQueuedMessages() @@ -738,11 +736,14 @@ export class CdpCyclotronWorker extends CdpConsumerBase { if (item.finished) { console.log('Updating job to completed', id) this.cyclotronWorker?.updateJob(id, 'completed') + } else if (item.error) { + console.log('Updating job to failed', id) + this.cyclotronWorker?.updateJob(id, 'failed') } else { console.log('Updating job to available', id) this.cyclotronWorker?.updateJob(id, 'available', { priority: item.invocation.priority, - vmState: item.invocation, + vmState: serializeHogFunctionInvocation(item.invocation), queueName: item.invocation.queue, parameters: item.invocation.queueParameters ?? null, }) @@ -755,27 +756,14 @@ export class CdpCyclotronWorker extends CdpConsumerBase { private async innerStart() { try { while (!this.isStopping) { - const jobs = await runInstrumentedFunction({ - statsKey: `cdpConsumer.cyclotronWorker.dequeueJobsWithVmState`, - func: async () => { - status.info('!', `Dequeing jobs: ${this.queue}`) - return await this.cyclotronWorker!.dequeueJobsWithVmState(this.queue, this.limit) - }, - timeout: 3000, - }) - - status.info('!', `Dequeued jobs ${jobs.length}: ${this.queue}`) - - // TODO: How do we "hold" these dequeued jobs? + const jobs = await this.cyclotronWorker!.dequeueJobsWithVmState(this.queue, this.limit) const invocations: HogFunctionInvocation[] = [] if (!jobs.length) { await delay(100) - return + continue } - console.log('jobs', jobs) - for (const job of jobs) { // NOTE: This is all a bit messy and might be better to refactor into a helper if (!job.functionId) { diff --git a/plugin-server/src/cdp/hog-function-manager.ts b/plugin-server/src/cdp/hog-function-manager.ts index 389befec7bbe9..94803e209f25e 100644 --- a/plugin-server/src/cdp/hog-function-manager.ts +++ b/plugin-server/src/cdp/hog-function-manager.ts @@ -95,7 +95,6 @@ export class HogFunctionManager { if (!this.ready) { throw new Error('HogFunctionManager is not ready! Run HogFunctionManager.start() before this') } - console.log(this.cache.functions) return this.cache.functions[id] } @@ -144,7 +143,6 @@ export class HogFunctionManager { this.cache = cache status.info('🍿', 'Fetched all hog functions from DB anew') - console.log('Fetched all hog functions from DB anew') } public async reloadHogFunctions(teamId: Team['id'], ids: HogFunctionType['id'][]): Promise { diff --git a/plugin-server/tests/cdp/cdp-e2e.test.ts b/plugin-server/tests/cdp/cdp-e2e.test.ts index 52d545d1f0449..3c1cfdc6cf606 100644 --- a/plugin-server/tests/cdp/cdp-e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-e2e.test.ts @@ -29,10 +29,7 @@ jest.mock('../../src/utils/fetch', () => { const mockFetch: jest.Mock = require('../../src/utils/fetch').trackedFetch describe('CDP E2E', () => { - describe.each([ - // 'kafka', - 'cyclotron', - ])('e2e fetch call: %s', (mode) => { + describe.each(['kafka', 'cyclotron'])('e2e fetch call: %s', (mode) => { let processedEventsConsumer: CdpProcessedEventsConsumer let functionProcessor: CdpFunctionCallbackConsumer let cyclotronWorker: CdpCyclotronWorker | undefined @@ -46,11 +43,6 @@ describe('CDP E2E', () => { const insertHogFunction = async (hogFunction: Partial) => { const item = await _insertHogFunction(hub.postgres, team.id, hogFunction) - // Trigger the reload that django would do - // await processedEventsConsumer.hogFunctionManager.reloadAllHogFunctions() - // await functionProcessor.hogFunctionManager.reloadAllHogFunctions() - // await cyclotronWorker?.hogFunctionManager.reloadAllHogFunctions() - // await cyclotronFetchWorker?.hogFunctionManager.reloadAllHogFunctions() return item } @@ -65,8 +57,6 @@ describe('CDP E2E', () => { ...HOG_FILTERS_EXAMPLES.no_filters, }) - console.log(fnFetchNoFilters.id) - if (mode === 'cyclotron') { hub.CDP_CYCLOTRON_ENABLED_TEAMS = '*' hub.CYCLOTRON_DATABASE_URL = 'postgres://posthog:posthog@localhost:5432/test_cyclotron' @@ -105,18 +95,14 @@ describe('CDP E2E', () => { }) afterEach(async () => { - try { - await Promise.all([ - processedEventsConsumer?.stop(), - functionProcessor?.stop(), - kafkaObserver?.stop(), - cyclotronWorker?.stop(), - cyclotronFetchWorker?.stop(), - ]) - await closeHub() - } catch (e) { - console.error('Error in afterEach:', e) - } + await Promise.all([ + processedEventsConsumer?.stop(), + functionProcessor?.stop(), + kafkaObserver?.stop(), + cyclotronWorker?.stop(), + cyclotronFetchWorker?.stop(), + ]) + await closeHub() }) afterAll(() => { @@ -152,7 +138,24 @@ describe('CDP E2E', () => { ] `) - expect(kafkaObserver.messages).toMatchObject([ + const logMessages = kafkaObserver.messages.filter((m) => m.topic === KAFKA_LOG_ENTRIES) + const metricsMessages = kafkaObserver.messages.filter((m) => m.topic === KAFKA_APP_METRICS_2) + + expect(metricsMessages).toMatchObject([ + { + topic: 'clickhouse_app_metrics2_test', + value: { + app_source: 'hog_function', + app_source_id: fnFetchNoFilters.id.toString(), + count: 1, + metric_kind: 'success', + metric_name: 'succeeded', + team_id: 2, + }, + }, + ]) + + expect(logMessages).toMatchObject([ { topic: 'log_entries_test', value: { @@ -175,17 +178,6 @@ describe('CDP E2E', () => { team_id: 2, }, }, - { - topic: 'clickhouse_app_metrics2_test', - value: { - app_source: 'hog_function', - app_source_id: fnFetchNoFilters.id.toString(), - count: 1, - metric_kind: 'success', - metric_name: 'succeeded', - team_id: 2, - }, - }, { topic: 'log_entries_test', value: { diff --git a/rust/cyclotron-node/src/manager.ts b/rust/cyclotron-node/src/manager.ts index 0cffdd750cf01..f193f63ce5062 100644 --- a/rust/cyclotron-node/src/manager.ts +++ b/rust/cyclotron-node/src/manager.ts @@ -33,8 +33,6 @@ export class CyclotronManager { metadata: job.metadata ? serializeObject('metadata', job.metadata) : null, } - console.log('Creating job:', jobInitInternal) - const json = JSON.stringify(jobInitInternal) return await cyclotron.createJob(json, job.blob ? job.blob.buffer : undefined) } diff --git a/rust/cyclotron-node/src/worker.ts b/rust/cyclotron-node/src/worker.ts index 8827b7b615798..ad315a844be8c 100644 --- a/rust/cyclotron-node/src/worker.ts +++ b/rust/cyclotron-node/src/worker.ts @@ -45,11 +45,10 @@ export class CyclotronWorker { cyclotron.setParameters(id, serializeObject('parameters', updates.parameters)) } if (updates?.metadata) { - cyclotron.setMetadata(id, updates.metadata) + cyclotron.setMetadata(id, serializeObject('metadata', updates.metadata)) } - if (updates?.vmState) { - cyclotron.setMetadata(id, updates.metadata) + cyclotron.setVmState(id, serializeObject('vmState', updates.vmState)) } } From a97f7a065e6ccd88927882aa7af3895fb0bab108 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 15:11:03 +0200 Subject: [PATCH 25/75] Updated worker format --- plugin-server/src/cdp/cdp-consumers.ts | 113 ++++++++++--------------- rust/cyclotron-node/src/worker.ts | 110 +++++++++++++++--------- 2 files changed, 115 insertions(+), 108 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 81dee5a968051..fbb91fa9cf58a 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -1,4 +1,4 @@ -import { CyclotronManager, CyclotronWorker } from '@posthog/cyclotron' +import { CyclotronManager, CyclotronWorker, Job } from '@posthog/cyclotron' import { captureException } from '@sentry/node' import { Message } from 'node-rdkafka' import { Counter, Histogram } from 'prom-client' @@ -20,7 +20,7 @@ import { createKafkaProducerWrapper } from '../utils/db/hub' import { KafkaProducerWrapper } from '../utils/db/kafka-producer-wrapper' import { captureTeamEvent } from '../utils/posthog' import { status } from '../utils/status' -import { castTimestampOrNow, delay } from '../utils/utils' +import { castTimestampOrNow } from '../utils/utils' import { RustyHook } from '../worker/rusty-hook' import { FetchExecutor } from './fetch-executor' import { GroupsManager } from './groups-manager' @@ -645,7 +645,6 @@ export class CdpFunctionCallbackConsumer extends CdpConsumerBase { invocationSerialized.hogFunctionId ) if (!hogFunction) { - console.log('HERE!!!!', invocationSerialized) status.error('Error finding hog function', { id: invocationSerialized.hogFunctionId, }) @@ -698,7 +697,6 @@ export class CdpCyclotronWorker extends CdpConsumerBase { protected name = 'CdpCyclotronWorker' private cyclotronWorker?: CyclotronWorker private runningWorker: Promise | undefined - private isUnhealthy = false protected queue: 'hog' | 'fetch' = 'hog' protected limit = 100 @@ -712,7 +710,6 @@ export class CdpCyclotronWorker extends CdpConsumerBase { func: async () => { // NOTE: In the future this service will never do fetching (unless we decide we want to do it in node at some point) // This is just "for now" to support the transition to cyclotron - console.log('processing invocations', invocations) const fetchQueue = invocations.filter((item) => item.queue === 'fetch') const fetchResults = await this.runManyWithHeartbeat(fetchQueue, (item) => this.fetchExecutor.execute(item) @@ -734,13 +731,13 @@ export class CdpCyclotronWorker extends CdpConsumerBase { invocations.map(async (item) => { const id = item.invocation.id if (item.finished) { - console.log('Updating job to completed', id) + status.debug('⚡️', 'Updating job to completed', id) this.cyclotronWorker?.updateJob(id, 'completed') } else if (item.error) { - console.log('Updating job to failed', id) + status.debug('⚡️', 'Updating job to failed', id) this.cyclotronWorker?.updateJob(id, 'failed') } else { - console.log('Updating job to available', id) + status.debug('⚡️', 'Updating job to available', id) this.cyclotronWorker?.updateJob(id, 'available', { priority: item.invocation.priority, vmState: serializeHogFunctionInvocation(item.invocation), @@ -753,86 +750,70 @@ export class CdpCyclotronWorker extends CdpConsumerBase { ) } - private async innerStart() { - try { - while (!this.isStopping) { - const jobs = await this.cyclotronWorker!.dequeueJobsWithVmState(this.queue, this.limit) - const invocations: HogFunctionInvocation[] = [] + private async handleJobBatch(jobs: Job[]) { + console.log('RECEIVED JOBS', jobs) + const invocations: HogFunctionInvocation[] = [] - if (!jobs.length) { - await delay(100) - continue - } - - for (const job of jobs) { - // NOTE: This is all a bit messy and might be better to refactor into a helper - if (!job.functionId) { - throw new Error('Bad job: ' + JSON.stringify(job)) - } - const hogFunction = this.hogFunctionManager.getHogFunction(job.functionId) - - if (!hogFunction) { - // Here we need to mark the job as failed - - status.error('Error finding hog function', { - id: job.functionId, - }) - this.cyclotronWorker?.updateJob(job.id, 'failed') - await this.cyclotronWorker?.flushJob(job.id) - continue - } + for (const job of jobs) { + // NOTE: This is all a bit messy and might be better to refactor into a helper + if (!job.functionId) { + throw new Error('Bad job: ' + JSON.stringify(job)) + } + const hogFunction = this.hogFunctionManager.getHogFunction(job.functionId) - const parsedState = job.vmState as HogFunctionInvocationSerialized - - // TODO: Should ID come from the job or the state? - invocations.push({ - id: job.id, - globals: parsedState.globals, - teamId: hogFunction.team_id, - hogFunction, - priority: job.priority, - queue: (job.queueName as any) ?? 'hog', - queueParameters: job.parameters as HogFunctionInvocationQueueParameters | undefined, - vmState: parsedState.vmState, - timings: parsedState.timings, - }) - } + if (!hogFunction) { + // Here we need to mark the job as failed - await this.processBatch(invocations) + status.error('Error finding hog function', { + id: job.functionId, + }) + this.cyclotronWorker?.updateJob(job.id, 'failed') + await this.cyclotronWorker?.flushJob(job.id) + continue } - } catch (err) { - this.isUnhealthy = true - console.error('Error in Cyclotron worker', err) - throw err + + const parsedState = job.vmState as HogFunctionInvocationSerialized + + // TODO: Should ID come from the job or the state? + invocations.push({ + id: job.id, + globals: parsedState.globals, + teamId: hogFunction.team_id, + hogFunction, + priority: job.priority, + queue: (job.queueName as any) ?? 'hog', + queueParameters: job.parameters as HogFunctionInvocationQueueParameters | undefined, + vmState: parsedState.vmState, + timings: parsedState.timings, + }) } - console.log('Cyclotron worker stopped') + await this.processBatch(invocations) } public async start() { await super.start() - this.cyclotronWorker = new CyclotronWorker({ dbUrl: this.hub.CYCLOTRON_DATABASE_URL }) - await this.cyclotronWorker.connect() - - // Consumer `start` expects an async task is started, and not that `start` itself blocks - // indefinitely. - this.runningWorker = this.innerStart() - - return Promise.resolve() + this.cyclotronWorker = new CyclotronWorker({ + pool: { dbUrl: this.hub.CYCLOTRON_DATABASE_URL }, + queueName: this.queue, + includeVmState: true, + }) + await this.cyclotronWorker.connect((jobs) => this.handleJobBatch(jobs)) } public async stop() { await super.stop() - // this.cyclotronWorker.disconnect() + await this.cyclotronWorker?.disconnect() await this.runningWorker } public isHealthy() { - return this.isUnhealthy + return this.cyclotronWorker?.isHealthy() ?? false } } +// Mostly used for testing export class CdpCyclotronWorkerFetch extends CdpCyclotronWorker { protected name = 'CdpCyclotronWorkerFetch' protected queue = 'fetch' as const diff --git a/rust/cyclotron-node/src/worker.ts b/rust/cyclotron-node/src/worker.ts index ad315a844be8c..3ce4fbc301c72 100644 --- a/rust/cyclotron-node/src/worker.ts +++ b/rust/cyclotron-node/src/worker.ts @@ -12,21 +12,82 @@ const parseJob = (job: Job): Job => { } } +export type CyclotronWorkerConfig = { + pool: PoolConfig + /** The queue to be consumed from */ + queueName: string + /** Max number of jobs to consume in a batch. Default: 100 */ + batchMaxSize?: number + /** Whether the vmState will be included or not */ + includeVmState?: boolean + /** Amount of delay between dequeue polls. Default: 50ms */ + pollDelayMs?: number + /** Heartbeat timeout. After this time without response from the worker loop the worker will be considered unhealthy. Default 30000 */ + heartbeatTimeoutMs?: number +} + export class CyclotronWorker { - constructor(private config: PoolConfig) { + isConsuming: boolean = false + lastHeartbeat: Date = new Date() + + private consumerLoopPromise: Promise | null = null + + constructor(private config: CyclotronWorkerConfig) { this.config = config } - async connect(): Promise { - return await cyclotron.maybeInitWorker(JSON.stringify(convertToInternalPoolConfig(this.config))) + public isHealthy(): boolean { + return ( + this.isConsuming && + new Date().getTime() - this.lastHeartbeat.getTime() < (this.config.heartbeatTimeoutMs ?? 30000) + ) } - async dequeueJobs(queueName: string, limit: number): Promise { - return (await cyclotron.dequeueJobs(queueName, limit)).map(parseJob) + async connect(processBatch: (jobs: Job[]) => Promise): Promise { + if (this.isConsuming) { + throw new Error('Already consuming') + } + + await cyclotron.maybeInitWorker(JSON.stringify(convertToInternalPoolConfig(this.config.pool))) + + this.consumerLoopPromise = this.startConsumerLoop(processBatch) } - async dequeueJobsWithVmState(queueName: string, limit: number): Promise { - return (await cyclotron.dequeueJobsWithVmState(queueName, limit)).map(parseJob) + private async startConsumerLoop(processBatch: (jobs: Job[]) => Promise): Promise { + try { + this.isConsuming = true + + const batchMaxSize = this.config.batchMaxSize ?? 100 + const pollDelayMs = this.config.pollDelayMs ?? 50 + + while (this.isConsuming) { + this.lastHeartbeat = new Date() + + const jobs = ( + this.config.includeVmState + ? await cyclotron.dequeueJobsWithVmState(this.config.queueName, batchMaxSize) + : await cyclotron.dequeueJobs(this.config.queueName, batchMaxSize) + ).map(parseJob) + + if (!jobs.length) { + // Wait a bit before polling again + await new Promise((resolve) => setTimeout(resolve, pollDelayMs)) + continue + } + + await processBatch(jobs) + } + } catch (e) { + // We only log here so as not to crash the parent process + console.error('Error in worker loop', e) + } finally { + this.isConsuming = false + } + } + + async disconnect(): Promise { + this.isConsuming = false + await (this.consumerLoopPromise ?? Promise.resolve()) } async flushJob(jobId: string): Promise { @@ -51,39 +112,4 @@ export class CyclotronWorker { cyclotron.setVmState(id, serializeObject('vmState', updates.vmState)) } } - - // setState(jobId: string, jobState: JobState): void { - // return cyclotron.setState(jobId, jobState) - // } - - // setQueue(jobId: string, queueName: string): void { - // return cyclotron.setQueue(jobId, queueName) - // } - - // setPriority(jobId: string, priority: number): void { - // return cyclotron.setPriority(jobId, priority) - // } - - // setScheduledAt(jobId: string, scheduledAt: Date): void { - // return cyclotron.setScheduledAt(jobId, scheduledAt.toISOString()) - // } - - // setVmState(jobId: string, vmState: Record | null): void { - // const serialized = serializeObject('vmState', vmState) - // return cyclotron.setVmState(jobId, serialized) - // } - - // setMetadata(jobId: string, metadata: Record | null): void { - // const serialized = serializeObject('metadata', metadata) - // return cyclotron.setMetadata(jobId, serialized) - // } - - // setParameters(jobId: string, parameters: Record | null): void { - // const serialized = serializeObject('parameters', parameters) - // return cyclotron.setParameters(jobId, serialized) - // } - - // setBlob(jobId: string, blob: Uint8Array | null): void { - // return cyclotron.setBlob(jobId, blob) - // } } From 9b812100253d875777fec635b14a42ed281f0e3d Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 15:15:35 +0200 Subject: [PATCH 26/75] Fixes --- rust/cyclotron-node/src/worker.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rust/cyclotron-node/src/worker.ts b/rust/cyclotron-node/src/worker.ts index 3ce4fbc301c72..9f8802ec39009 100644 --- a/rust/cyclotron-node/src/worker.ts +++ b/rust/cyclotron-node/src/worker.ts @@ -50,7 +50,11 @@ export class CyclotronWorker { await cyclotron.maybeInitWorker(JSON.stringify(convertToInternalPoolConfig(this.config.pool))) - this.consumerLoopPromise = this.startConsumerLoop(processBatch) + this.isConsuming = true + this.consumerLoopPromise = this.startConsumerLoop(processBatch).finally(() => { + this.isConsuming = false + this.consumerLoopPromise = null + }) } private async startConsumerLoop(processBatch: (jobs: Job[]) => Promise): Promise { @@ -79,9 +83,7 @@ export class CyclotronWorker { } } catch (e) { // We only log here so as not to crash the parent process - console.error('Error in worker loop', e) - } finally { - this.isConsuming = false + console.error('[Cyclotron] Error in worker loop', e) } } From 2236ec0160b38ed475baf3c44b8258f8bd0e5633 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 15:17:41 +0200 Subject: [PATCH 27/75] fix --- plugin-server/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin-server/package.json b/plugin-server/package.json index 4aeed2450c4c0..9ee04323b087c 100644 --- a/plugin-server/package.json +++ b/plugin-server/package.json @@ -23,7 +23,7 @@ "prettier:check": "prettier --check .", "prepublishOnly": "pnpm build", "setup:dev:clickhouse": "cd .. && DEBUG=1 python manage.py migrate_clickhouse", - "setup:test": "cd .. && TEST=1 python manage.py setup_test_environment && pnpm setup:test:cyclotron", + "setup:test": "cd .. && TEST=1 python manage.py setup_test_environment && cd plugin-server && pnpm run setup:test:cyclotron", "setup:test:cyclotron": "cd ../rust && ./bin/migrate-cyclotron-test", "services:start": "cd .. && docker compose -f docker-compose.dev.yml up", "services:stop": "cd .. && docker compose -f docker-compose.dev.yml down", From 5fa203cfc90867846183c5c79510958d0c3e3c18 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 15:20:38 +0200 Subject: [PATCH 28/75] Re-enable worker --- plugin-server/src/config/config.ts | 6 +++++- plugin-server/src/main/pluginsServer.ts | 19 ++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index d5f391d9e1292..9b5ca929b2e78 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -193,7 +193,11 @@ export function getDefaultConfig(): PluginsServerConfig { CDP_REDIS_PORT: 6479, // Cyclotron - CYCLOTRON_DATABASE_URL: '', + CYCLOTRON_DATABASE_URL: isTestEnv() + ? 'postgres://posthog:posthog@localhost:5432/test_cyclotron' + : isDevEnv() + ? 'postgres://posthog:posthog@localhost:5432/cyclotron' + : '', } } diff --git a/plugin-server/src/main/pluginsServer.ts b/plugin-server/src/main/pluginsServer.ts index 97f18c4ea1def..7d35a7241a533 100644 --- a/plugin-server/src/main/pluginsServer.ts +++ b/plugin-server/src/main/pluginsServer.ts @@ -11,7 +11,7 @@ import v8Profiler from 'v8-profiler-next' import { getPluginServerCapabilities } from '../capabilities' import { CdpApi } from '../cdp/cdp-api' -import { CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../cdp/cdp-consumers' +import { CdpCyclotronWorker, CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../cdp/cdp-consumers' import { defaultConfig, sessionRecordingConsumerConfig } from '../config/config' import { Hub, PluginServerCapabilities, PluginsServerConfig } from '../types' import { createHub, createKafkaClient, createKafkaProducerWrapper } from '../utils/db/hub' @@ -520,16 +520,13 @@ export async function startPluginsServer( } } - // if (capabilities.cdpCyclotronWorker) { - // ;[hub, closeHub] = hub ? [hub, closeHub] : await createHub(serverConfig, capabilities) - // if (hub.CYCLOTRON_DATABASE_URL) { - // const worker = new CdpCyclotronWorker(hub) - // await worker.start() - // } else { - // // This is a temporary solution until we *require* Cyclotron to be configured. - // status.warn('💥', 'CYCLOTRON_DATABASE_URL is not set, not running Cyclotron worker') - // } - // } + if (capabilities.cdpCyclotronWorker) { + ;[hub, closeHub] = hub ? [hub, closeHub] : await createHub(serverConfig, capabilities) + const worker = new CdpCyclotronWorker(hub) + await worker.start() + shutdownCallbacks.push(async () => await worker.stop()) + healthChecks['cdp-cyclotron-worker'] = () => worker.isHealthy() ?? false + } if (capabilities.http) { const app = setupCommonRoutes(healthChecks, serverInstance?.queue ?? undefined) From b3b07b72c9338e0e7e614353d9cac6a7bd1515b6 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 15:27:16 +0200 Subject: [PATCH 29/75] Fixes --- plugin-server/package.json | 2 +- rust/bin/migrate-cyclotron | 9 +++++++++ rust/bin/migrate-cyclotron-test | 7 ------- 3 files changed, 10 insertions(+), 8 deletions(-) create mode 100755 rust/bin/migrate-cyclotron delete mode 100755 rust/bin/migrate-cyclotron-test diff --git a/plugin-server/package.json b/plugin-server/package.json index 9ee04323b087c..0c2d38ebc8f71 100644 --- a/plugin-server/package.json +++ b/plugin-server/package.json @@ -24,7 +24,7 @@ "prepublishOnly": "pnpm build", "setup:dev:clickhouse": "cd .. && DEBUG=1 python manage.py migrate_clickhouse", "setup:test": "cd .. && TEST=1 python manage.py setup_test_environment && cd plugin-server && pnpm run setup:test:cyclotron", - "setup:test:cyclotron": "cd ../rust && ./bin/migrate-cyclotron-test", + "setup:test:cyclotron": "cd ../rust && DATABASE_NAME=test_cyclotron ./bin/migrate-cyclotron", "services:start": "cd .. && docker compose -f docker-compose.dev.yml up", "services:stop": "cd .. && docker compose -f docker-compose.dev.yml down", "services:clean": "cd .. && docker compose -f docker-compose.dev.yml rm -v", diff --git a/rust/bin/migrate-cyclotron b/rust/bin/migrate-cyclotron new file mode 100755 index 0000000000000..5f523c36b5cd1 --- /dev/null +++ b/rust/bin/migrate-cyclotron @@ -0,0 +1,9 @@ +#!/bin/sh +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +export DATABASE_NAME=${DATABASE_NAME:-cyclotron} +export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} + +echo $SCRIPT_DIR/cyclotron-core/migrations +sqlx database create -D "$DATABASE_URL" +sqlx migrate run -D "$DATABASE_URL" --source $SCRIPT_DIR/../cyclotron-core/migrations diff --git a/rust/bin/migrate-cyclotron-test b/rust/bin/migrate-cyclotron-test deleted file mode 100755 index 04a139c8d9ba9..0000000000000 --- a/rust/bin/migrate-cyclotron-test +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -export DATABASE_NAME=${DATABASE_NAME:-test_cyclotron} -export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} - -sqlx database create -D "$DATABASE_URL" -sqlx migrate run -D "$DATABASE_URL" --source ./cyclotron-core/migrations From 9b6602a06d5d184d524e899c75abdf6275984093 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 15:31:12 +0200 Subject: [PATCH 30/75] Fixes --- bin/migrate | 3 +++ rust/bin/migrate-cyclotron | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/migrate b/bin/migrate index 1c32b3b5b0614..fc23c103a1566 100755 --- a/bin/migrate +++ b/bin/migrate @@ -1,5 +1,8 @@ #!/bin/bash set -e +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) + +bash $SCRIPT_DIR/../rust/bin/migrate-cyclotron python manage.py migrate python manage.py migrate_clickhouse diff --git a/rust/bin/migrate-cyclotron b/rust/bin/migrate-cyclotron index 5f523c36b5cd1..f64c0c9cc7ac7 100755 --- a/rust/bin/migrate-cyclotron +++ b/rust/bin/migrate-cyclotron @@ -4,6 +4,5 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) export DATABASE_NAME=${DATABASE_NAME:-cyclotron} export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} -echo $SCRIPT_DIR/cyclotron-core/migrations sqlx database create -D "$DATABASE_URL" sqlx migrate run -D "$DATABASE_URL" --source $SCRIPT_DIR/../cyclotron-core/migrations From f3966451f9153ee78ba82ce28540199fdb8074e1 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 15:32:22 +0200 Subject: [PATCH 31/75] Fixes --- plugin-server/src/main/pluginsServer.ts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/plugin-server/src/main/pluginsServer.ts b/plugin-server/src/main/pluginsServer.ts index 7d35a7241a533..3fbce989bde33 100644 --- a/plugin-server/src/main/pluginsServer.ts +++ b/plugin-server/src/main/pluginsServer.ts @@ -11,7 +11,12 @@ import v8Profiler from 'v8-profiler-next' import { getPluginServerCapabilities } from '../capabilities' import { CdpApi } from '../cdp/cdp-api' -import { CdpCyclotronWorker, CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../cdp/cdp-consumers' +import { + CdpCyclotronWorker, + CdpCyclotronWorkerFetch, + CdpFunctionCallbackConsumer, + CdpProcessedEventsConsumer, +} from '../cdp/cdp-consumers' import { defaultConfig, sessionRecordingConsumerConfig } from '../config/config' import { Hub, PluginServerCapabilities, PluginsServerConfig } from '../types' import { createHub, createKafkaClient, createKafkaProducerWrapper } from '../utils/db/hub' @@ -524,6 +529,12 @@ export async function startPluginsServer( ;[hub, closeHub] = hub ? [hub, closeHub] : await createHub(serverConfig, capabilities) const worker = new CdpCyclotronWorker(hub) await worker.start() + + if (process.env.EXPERIMENTAL_CDP_FETCH_WORKER) { + const fetchWorker = new CdpCyclotronWorkerFetch(hub) + await fetchWorker.start() + } + shutdownCallbacks.push(async () => await worker.stop()) healthChecks['cdp-cyclotron-worker'] = () => worker.isHealthy() ?? false } From b638063ebfc8a60487bd05a134004ebe8d3b0cb4 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 15:40:54 +0200 Subject: [PATCH 32/75] Setup rust on ci --- .github/workflows/ci-plugin-server.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml index a24eaf53d4e69..2fa3438793298 100644 --- a/.github/workflows/ci-plugin-server.yml +++ b/.github/workflows/ci-plugin-server.yml @@ -119,6 +119,21 @@ jobs: cache-dependency-path: '**/requirements*.txt' token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} + - name: Install rust + uses: dtolnay/rust-toolchain@1.77 + + - uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + rust/target + key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.lock') }} + + - name: Run cargo build + working-directory: rust + run: cargo build --all --locked --release && find target/release/ -maxdepth 1 -executable -type f | xargs strip + # uv is a fast pip alternative: https://github.com/astral-sh/uv/ - run: pip install uv From 64fdc13a1a9ce2f4b00d7ff1be5f5212a8136559 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 15:43:47 +0200 Subject: [PATCH 33/75] Fixes --- plugin-server/src/main/pluginsServer.ts | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/plugin-server/src/main/pluginsServer.ts b/plugin-server/src/main/pluginsServer.ts index 29fece1a738cd..1ddaa8f214e02 100644 --- a/plugin-server/src/main/pluginsServer.ts +++ b/plugin-server/src/main/pluginsServer.ts @@ -464,17 +464,16 @@ export async function startPluginsServer( } if (capabilities.cdpCyclotronWorker) { - ;[hub, closeHub] = hub ? [hub, closeHub] : await createHub(serverConfig, capabilities) + const hub = await setupHub() const worker = new CdpCyclotronWorker(hub) await worker.start() + services.push(worker.service) if (process.env.EXPERIMENTAL_CDP_FETCH_WORKER) { - const fetchWorker = new CdpCyclotronWorkerFetch(hub) - await fetchWorker.start() + const workerFetch = new CdpCyclotronWorkerFetch(hub) + await workerFetch.start() + services.push(workerFetch.service) } - - shutdownCallbacks.push(async () => await worker.stop()) - healthChecks['cdp-cyclotron-worker'] = () => worker.isHealthy() ?? false } if (capabilities.http) { From d8169b0b5b9498ed71ed4f98b2aac108f6f7bc51 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 15:52:38 +0200 Subject: [PATCH 34/75] More fixes --- .vscode/launch.json | 3 ++- bin/start-cyclotron | 2 +- plugin-server/src/cdp/cdp-consumers.ts | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 389be51af0c57..88f00c46c9502 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -119,7 +119,8 @@ "WORKER_CONCURRENCY": "2", "OBJECT_STORAGE_ENABLED": "True", "HOG_HOOK_URL": "http://localhost:3300/hoghook", - "CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS": "" + "CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS": "", + "CDP_CYCLOTRON_ENABLED_TEAMS": "*" }, "presentation": { "group": "main" diff --git a/bin/start-cyclotron b/bin/start-cyclotron index 074ec4802d0a4..bce25042f2870 100755 --- a/bin/start-cyclotron +++ b/bin/start-cyclotron @@ -12,7 +12,7 @@ export RUST_LOG=${DEBUG:-debug} SQLX_QUERY_LEVEL=${SQLX_QUERY_LEVEL:-warn} export RUST_LOG=$RUST_LOG,sqlx::query=$SQLX_QUERY_LEVEL -export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/posthog} +export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/cyclotron} export ALLOW_INTERNAL_IPS=${ALLOW_INTERNAL_IPS:-true} ./target/debug/cyclotron-fetch & diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 4180304126d8f..13fbb9997546b 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -768,7 +768,6 @@ export class CdpCyclotronWorker extends CdpConsumerBase { } private async handleJobBatch(jobs: Job[]) { - console.log('RECEIVED JOBS', jobs) const invocations: HogFunctionInvocation[] = [] for (const job of jobs) { From b96616a9cfa7518ffccb7126a039a3b32121dd69 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 16:05:10 +0200 Subject: [PATCH 35/75] fixes --- plugin-server/tests/cdp/cdp-consumer.e2e.test.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts index 98cb2e041b042..8d6581aef9ef0 100644 --- a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts @@ -1,7 +1,7 @@ import { CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../../src/cdp/cdp-consumers' import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' import { Hub, Team } from '../../src/types' -import { createHub } from '../../src/utils/db/hub' +import { closeHub, createHub } from '../../src/utils/db/hub' import { getFirstTeam, resetTestDatabase } from '../helpers/sql' import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' import { createHogExecutionGlobals, insertHogFunction as _insertHogFunction } from './fixtures' @@ -87,7 +87,6 @@ describe('CDP Consumers E2E', () => { let processedEventsConsumer: CdpProcessedEventsConsumer let functionProcessor: CdpFunctionCallbackConsumer let hub: Hub - let closeHub: () => Promise let team: Team const insertHogFunction = async (hogFunction: Partial) => { @@ -100,7 +99,7 @@ describe('CDP Consumers E2E', () => { beforeEach(async () => { await resetTestDatabase() - ;[hub, closeHub] = await createHub() + hub = await createHub() team = await getFirstTeam(hub) processedEventsConsumer = new CdpProcessedEventsConsumer(hub) @@ -115,7 +114,7 @@ describe('CDP Consumers E2E', () => { jest.setTimeout(10000) await processedEventsConsumer.stop() await functionProcessor.stop() - await closeHub() + await closeHub(hub) }) afterAll(() => { From 488c1726e02fa3f67824fa8c3ca870154694d028 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 16:05:52 +0200 Subject: [PATCH 36/75] Fixes --- plugin-server/tests/cdp/cdp-e2e.test.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/plugin-server/tests/cdp/cdp-e2e.test.ts b/plugin-server/tests/cdp/cdp-e2e.test.ts index 3c1cfdc6cf606..8ad3dbcb4e578 100644 --- a/plugin-server/tests/cdp/cdp-e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-e2e.test.ts @@ -7,7 +7,7 @@ import { import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' import { KAFKA_APP_METRICS_2, KAFKA_LOG_ENTRIES } from '../../src/config/kafka-topics' import { Hub, Team } from '../../src/types' -import { createHub } from '../../src/utils/db/hub' +import { closeHub, createHub } from '../../src/utils/db/hub' import { waitForExpect } from '../helpers/expectations' import { getFirstTeam, resetTestDatabase } from '../helpers/sql' import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' @@ -35,7 +35,6 @@ describe('CDP E2E', () => { let cyclotronWorker: CdpCyclotronWorker | undefined let cyclotronFetchWorker: CdpCyclotronWorkerFetch | undefined let hub: Hub - let closeHub: () => Promise let team: Team let kafkaObserver: TestKafkaObserver let fnFetchNoFilters: HogFunctionType @@ -48,7 +47,7 @@ describe('CDP E2E', () => { beforeEach(async () => { await resetTestDatabase() - ;[hub, closeHub] = await createHub() + hub = await createHub() team = await getFirstTeam(hub) fnFetchNoFilters = await insertHogFunction({ @@ -102,7 +101,7 @@ describe('CDP E2E', () => { cyclotronWorker?.stop(), cyclotronFetchWorker?.stop(), ]) - await closeHub() + await closeHub(hub) }) afterAll(() => { From 01ede4ea3cef2260d0659a9984d1d4b03fda8d60 Mon Sep 17 00:00:00 2001 From: Ben White Date: Tue, 3 Sep 2024 16:13:01 +0200 Subject: [PATCH 37/75] Fix types --- plugin-server/src/cdp/cdp-consumers.ts | 7 ++++--- plugin-server/src/config/config.ts | 2 ++ plugin-server/src/types.ts | 2 ++ rust/cyclotron-node/src/helpers.ts | 4 ++-- rust/cyclotron-node/src/manager.ts | 6 +++--- rust/cyclotron-node/src/types.ts | 19 +++++++++++-------- rust/cyclotron-node/src/worker.ts | 12 ++++++------ 7 files changed, 30 insertions(+), 22 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 13fbb9997546b..e256c96298ab0 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -1,4 +1,4 @@ -import { CyclotronManager, CyclotronWorker, Job } from '@posthog/cyclotron' +import { CyclotronJob, CyclotronManager, CyclotronWorker } from '@posthog/cyclotron' import { captureException } from '@sentry/node' import { Message } from 'node-rdkafka' import { Counter, Histogram } from 'prom-client' @@ -715,7 +715,6 @@ export class CdpCyclotronWorker extends CdpConsumerBase { private cyclotronWorker?: CyclotronWorker private runningWorker: Promise | undefined protected queue: 'hog' | 'fetch' = 'hog' - protected limit = 100 public async processBatch(invocations: HogFunctionInvocation[]): Promise { if (!invocations.length) { @@ -767,7 +766,7 @@ export class CdpCyclotronWorker extends CdpConsumerBase { ) } - private async handleJobBatch(jobs: Job[]) { + private async handleJobBatch(jobs: CyclotronJob[]) { const invocations: HogFunctionInvocation[] = [] for (const job of jobs) { @@ -814,6 +813,8 @@ export class CdpCyclotronWorker extends CdpConsumerBase { pool: { dbUrl: this.hub.CYCLOTRON_DATABASE_URL }, queueName: this.queue, includeVmState: true, + batchMaxSize: this.hub.CDP_CYCLOTRON_BATCH_SIZE, + pollDelayMs: this.hub.CDP_CYCLOTRON_BATCH_DELAY_MS, }) await this.cyclotronWorker.connect((jobs) => this.handleJobBatch(jobs)) } diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index a534fd816ec3f..23ed31968bdf9 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -187,6 +187,8 @@ export function getDefaultConfig(): PluginsServerConfig { CDP_REDIS_PASSWORD: '', CDP_REDIS_HOST: '', CDP_REDIS_PORT: 6479, + CDP_CYCLOTRON_BATCH_DELAY_MS: 50, + CDP_CYCLOTRON_BATCH_SIZE: 500, // Cyclotron CYCLOTRON_DATABASE_URL: isTestEnv() diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index a3ff6cb7c433d..fe9065bb9ce8a 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -114,6 +114,8 @@ export type CdpConfig = { CDP_WATCHER_DISABLED_TEMPORARY_MAX_COUNT: number // How many times a function can be disabled before it is disabled permanently CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS: string CDP_CYCLOTRON_ENABLED_TEAMS: string + CDP_CYCLOTRON_BATCH_SIZE: number + CDP_CYCLOTRON_BATCH_DELAY_MS: number CDP_REDIS_HOST: string CDP_REDIS_PORT: number CDP_REDIS_PASSWORD: string diff --git a/rust/cyclotron-node/src/helpers.ts b/rust/cyclotron-node/src/helpers.ts index a26f788da5e5b..ba1ace2a37161 100644 --- a/rust/cyclotron-node/src/helpers.ts +++ b/rust/cyclotron-node/src/helpers.ts @@ -1,6 +1,6 @@ -import { InternalPoolConfig, PoolConfig } from './types' +import { CyclotronInternalPoolConfig, CyclotronPoolConfig } from './types' -export function convertToInternalPoolConfig(poolConfig: PoolConfig): InternalPoolConfig { +export function convertToInternalPoolConfig(poolConfig: CyclotronPoolConfig): CyclotronInternalPoolConfig { return { db_url: poolConfig.dbUrl, max_connections: poolConfig.maxConnections, diff --git a/rust/cyclotron-node/src/manager.ts b/rust/cyclotron-node/src/manager.ts index f193f63ce5062..bba6488828ba2 100644 --- a/rust/cyclotron-node/src/manager.ts +++ b/rust/cyclotron-node/src/manager.ts @@ -2,10 +2,10 @@ const cyclotron = require('../index.node') import { convertToInternalPoolConfig, serializeObject } from './helpers' -import { JobInit, PoolConfig } from './types' +import { CyclotronJobInit, CyclotronPoolConfig } from './types' export class CyclotronManager { - constructor(private config: { shards: PoolConfig[] }) { + constructor(private config: { shards: CyclotronPoolConfig[] }) { this.config = config } @@ -17,7 +17,7 @@ export class CyclotronManager { ) } - async createJob(job: JobInit): Promise { + async createJob(job: CyclotronJobInit): Promise { job.priority ??= 1 job.scheduled ??= new Date() diff --git a/rust/cyclotron-node/src/types.ts b/rust/cyclotron-node/src/types.ts index 4b38657c2ca53..88c8a26099083 100644 --- a/rust/cyclotron-node/src/types.ts +++ b/rust/cyclotron-node/src/types.ts @@ -1,4 +1,4 @@ -export type PoolConfig = { +export type CyclotronPoolConfig = { dbUrl: string maxConnections?: number minConnections?: number @@ -8,7 +8,7 @@ export type PoolConfig = { } // Type as expected by Cyclotron. -export type InternalPoolConfig = { +export type CyclotronInternalPoolConfig = { db_url: string max_connections?: number min_connections?: number @@ -17,9 +17,9 @@ export type InternalPoolConfig = { idle_timeout_seconds?: number } -export type JobState = 'available' | 'running' | 'completed' | 'failed' | 'paused' +export type CyclotronJobState = 'available' | 'running' | 'completed' | 'failed' | 'paused' -export type Job = { +export type CyclotronJob = { id: string teamId: number functionId: string | null @@ -30,7 +30,7 @@ export type Job = { transitionCount: number lastTransition: Date queueName: string - state: JobState + state: CyclotronJobState priority: number scheduled: Date vmState: object | null @@ -39,7 +39,10 @@ export type Job = { blob: Uint8Array | null } -export type JobInit = Pick & - Pick, 'scheduled' | 'vmState' | 'parameters' | 'metadata' | 'blob'> +export type CyclotronJobInit = Pick & + Pick, 'scheduled' | 'vmState' | 'parameters' | 'metadata' | 'blob'> -export type JobUpdate = Pick, 'queueName' | 'priority' | 'vmState' | 'parameters' | 'metadata' | 'blob'> +export type CyclotronJobUpdate = Pick< + Partial, + 'queueName' | 'priority' | 'vmState' | 'parameters' | 'metadata' | 'blob' +> diff --git a/rust/cyclotron-node/src/worker.ts b/rust/cyclotron-node/src/worker.ts index 9f8802ec39009..53cab7de65a89 100644 --- a/rust/cyclotron-node/src/worker.ts +++ b/rust/cyclotron-node/src/worker.ts @@ -1,9 +1,9 @@ // eslint-disable-next-line @typescript-eslint/no-var-requires const cyclotron = require('../index.node') import { convertToInternalPoolConfig, deserializeObject, serializeObject } from './helpers' -import { Job, JobState, JobUpdate, PoolConfig } from './types' +import { CyclotronJob, CyclotronJobState, CyclotronJobUpdate, CyclotronPoolConfig } from './types' -const parseJob = (job: Job): Job => { +const parseJob = (job: CyclotronJob): CyclotronJob => { return { ...job, vmState: deserializeObject('vmState', job.vmState), @@ -13,7 +13,7 @@ const parseJob = (job: Job): Job => { } export type CyclotronWorkerConfig = { - pool: PoolConfig + pool: CyclotronPoolConfig /** The queue to be consumed from */ queueName: string /** Max number of jobs to consume in a batch. Default: 100 */ @@ -43,7 +43,7 @@ export class CyclotronWorker { ) } - async connect(processBatch: (jobs: Job[]) => Promise): Promise { + async connect(processBatch: (jobs: CyclotronJob[]) => Promise): Promise { if (this.isConsuming) { throw new Error('Already consuming') } @@ -57,7 +57,7 @@ export class CyclotronWorker { }) } - private async startConsumerLoop(processBatch: (jobs: Job[]) => Promise): Promise { + private async startConsumerLoop(processBatch: (jobs: CyclotronJob[]) => Promise): Promise { try { this.isConsuming = true @@ -96,7 +96,7 @@ export class CyclotronWorker { return await cyclotron.flushJob(jobId) } - updateJob(id: Job['id'], state: JobState, updates?: JobUpdate): void { + updateJob(id: CyclotronJob['id'], state: CyclotronJobState, updates?: CyclotronJobUpdate): void { cyclotron.setState(id, state) if (updates?.queueName) { cyclotron.setQueue(id, updates.queueName) From 7843af01cb02cce23dd183413f3f40adbcefd127 Mon Sep 17 00:00:00 2001 From: Ben White Date: Wed, 4 Sep 2024 10:12:03 +0200 Subject: [PATCH 38/75] Fixes --- .github/workflows/ci-plugin-server.yml | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml index 2fa3438793298..66176d71d947c 100644 --- a/.github/workflows/ci-plugin-server.yml +++ b/.github/workflows/ci-plugin-server.yml @@ -122,17 +122,9 @@ jobs: - name: Install rust uses: dtolnay/rust-toolchain@1.77 - - uses: actions/cache@v4 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - rust/target - key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.lock') }} - - - name: Run cargo build + - name: Install sqlx-cli working-directory: rust - run: cargo build --all --locked --release && find target/release/ -maxdepth 1 -executable -type f | xargs strip + run: cargo install sqlx-cli --no-default-features --features native-tls,postgres # uv is a fast pip alternative: https://github.com/astral-sh/uv/ - run: pip install uv From ac20828098f2e3871a16f8565edeab523d2eb109 Mon Sep 17 00:00:00 2001 From: Ben White Date: Wed, 4 Sep 2024 10:22:29 +0200 Subject: [PATCH 39/75] Fixes --- .github/workflows/ci-plugin-server.yml | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml index 66176d71d947c..6fb102316800a 100644 --- a/.github/workflows/ci-plugin-server.yml +++ b/.github/workflows/ci-plugin-server.yml @@ -57,7 +57,6 @@ jobs: defaults: run: working-directory: 'plugin-server' - steps: - uses: actions/checkout@v3 @@ -82,6 +81,7 @@ jobs: tests: name: Plugin Server Tests (${{matrix.shard}}) needs: changes + if: needs.changes.outputs.plugin-server == 'true' runs-on: ubuntu-latest strategy: @@ -97,21 +97,17 @@ jobs: steps: - name: Code check out - if: needs.changes.outputs.plugin-server == 'true' uses: actions/checkout@v3 - name: Stop/Start stack with Docker Compose - if: needs.changes.outputs.plugin-server == 'true' run: | docker compose -f docker-compose.dev.yml down docker compose -f docker-compose.dev.yml up -d - name: Add Kafka to /etc/hosts - if: needs.changes.outputs.plugin-server == 'true' run: echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts - name: Set up Python - if: needs.changes.outputs.plugin-server == 'true' uses: actions/setup-python@v5 with: python-version: 3.11.9 @@ -130,23 +126,19 @@ jobs: - run: pip install uv - name: Install SAML (python3-saml) dependencies - if: needs.changes.outputs.plugin-server == 'true' run: | sudo apt-get update sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl - name: Install python dependencies - if: needs.changes.outputs.plugin-server == 'true' run: | uv pip install --system -r requirements-dev.txt uv pip install --system -r requirements.txt - name: Install pnpm - if: needs.changes.outputs.plugin-server == 'true' uses: pnpm/action-setup@v4 - name: Set up Node.js - if: needs.changes.outputs.plugin-server == 'true' uses: actions/setup-node@v4 with: node-version: 18.12.1 @@ -154,17 +146,14 @@ jobs: cache-dependency-path: plugin-server/pnpm-lock.yaml - name: Install package.json dependencies with pnpm - if: needs.changes.outputs.plugin-server == 'true' run: cd plugin-server && pnpm i - name: Wait for Clickhouse, Redis & Kafka - if: needs.changes.outputs.plugin-server == 'true' run: | docker compose -f docker-compose.dev.yml up kafka redis clickhouse -d --wait bin/check_kafka_clickhouse_up - name: Set up databases - if: needs.changes.outputs.plugin-server == 'true' env: TEST: 'true' SECRET_KEY: 'abcdef' # unsafe - for testing only @@ -172,7 +161,6 @@ jobs: run: cd plugin-server && pnpm setup:test - name: Test with Jest - if: needs.changes.outputs.plugin-server == 'true' env: # Below DB name has `test_` prepended, as that's how Django (ran above) creates the test DB DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/test_posthog' From 61e63bbca4e3f4aba216d2fc11a573b93a5d5c55 Mon Sep 17 00:00:00 2001 From: Ben White Date: Wed, 4 Sep 2024 10:23:56 +0200 Subject: [PATCH 40/75] fix --- .github/workflows/ci-plugin-server.yml | 6 +++--- rust/bin/migrate-cyclotron | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml index 6fb102316800a..0104b65338863 100644 --- a/.github/workflows/ci-plugin-server.yml +++ b/.github/workflows/ci-plugin-server.yml @@ -115,6 +115,9 @@ jobs: cache-dependency-path: '**/requirements*.txt' token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} + # uv is a fast pip alternative: https://github.com/astral-sh/uv/ + - run: pip install uv + - name: Install rust uses: dtolnay/rust-toolchain@1.77 @@ -122,9 +125,6 @@ jobs: working-directory: rust run: cargo install sqlx-cli --no-default-features --features native-tls,postgres - # uv is a fast pip alternative: https://github.com/astral-sh/uv/ - - run: pip install uv - - name: Install SAML (python3-saml) dependencies run: | sudo apt-get update diff --git a/rust/bin/migrate-cyclotron b/rust/bin/migrate-cyclotron index f64c0c9cc7ac7..23d5110243cd2 100755 --- a/rust/bin/migrate-cyclotron +++ b/rust/bin/migrate-cyclotron @@ -4,5 +4,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) export DATABASE_NAME=${DATABASE_NAME:-cyclotron} export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} +echo "Script dir: $SCRIPT_DIR" + sqlx database create -D "$DATABASE_URL" sqlx migrate run -D "$DATABASE_URL" --source $SCRIPT_DIR/../cyclotron-core/migrations From 022b7d6b935cb588f175e8066860419222f9589f Mon Sep 17 00:00:00 2001 From: Ben White Date: Wed, 4 Sep 2024 10:51:12 +0200 Subject: [PATCH 41/75] Fixes --- bin/migrate | 2 +- plugin-server/package.json | 2 +- rust/bin/migrate-cyclotron | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/migrate b/bin/migrate index fc23c103a1566..bc7608fa7b4c8 100755 --- a/bin/migrate +++ b/bin/migrate @@ -1,6 +1,6 @@ #!/bin/bash set -e -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") bash $SCRIPT_DIR/../rust/bin/migrate-cyclotron diff --git a/plugin-server/package.json b/plugin-server/package.json index 4037dfab6b0e7..0d79dbbaf6a22 100644 --- a/plugin-server/package.json +++ b/plugin-server/package.json @@ -24,7 +24,7 @@ "prepublishOnly": "pnpm build", "setup:dev:clickhouse": "cd .. && DEBUG=1 python manage.py migrate_clickhouse", "setup:test": "cd .. && TEST=1 python manage.py setup_test_environment && cd plugin-server && pnpm run setup:test:cyclotron", - "setup:test:cyclotron": "cd ../rust && DATABASE_NAME=test_cyclotron ./bin/migrate-cyclotron", + "setup:test:cyclotron": "DATABASE_NAME=test_cyclotron ../rust/bin/migrate-cyclotron", "services:start": "cd .. && docker compose -f docker-compose.dev.yml up", "services:stop": "cd .. && docker compose -f docker-compose.dev.yml down", "services:clean": "cd .. && docker compose -f docker-compose.dev.yml rm -v", diff --git a/rust/bin/migrate-cyclotron b/rust/bin/migrate-cyclotron index 23d5110243cd2..902485150750c 100755 --- a/rust/bin/migrate-cyclotron +++ b/rust/bin/migrate-cyclotron @@ -1,5 +1,5 @@ #!/bin/sh -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") export DATABASE_NAME=${DATABASE_NAME:-cyclotron} export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} From 4f5d0da8bb87af67e2a467b13d3d25c34d3e2679 Mon Sep 17 00:00:00 2001 From: Ben White Date: Wed, 4 Sep 2024 13:52:44 +0200 Subject: [PATCH 42/75] Fixes --- plugin-server/src/cdp/hog-executor.ts | 8 +++++--- plugin-server/src/cdp/types.ts | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index 382f6b3fc3549..299a5334d0173 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -164,14 +164,15 @@ export class HogExecutor { invocation.queue = 'hog' invocation.queueParameters = undefined + const status = typeof response?.status === 'number' ? response.status : 503 + // Special handling for fetch - // TODO: Would be good to have a dedicated value in the fetch response for the status code - if (response?.status && response.status >= 400) { + if (status >= 400) { // Generic warn log for bad status codes logs.push({ level: 'warn', timestamp: DateTime.now(), - message: `Fetch returned bad status: ${response.status}`, + message: `Fetch returned bad status: ${status}`, }) } @@ -337,6 +338,7 @@ export class HogExecutor { method, headers, body, + return_queue: 'hog', } break diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index e8e673f79982a..211bf04328301 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -157,7 +157,9 @@ export type HogFunctionQueueParametersFetchRequest = { url: string method: string body: string - headers: Record + return_queue: string + max_tries?: number + headers?: Record } export type HogFunctionQueueParametersFetchResponse = { @@ -165,7 +167,7 @@ export type HogFunctionQueueParametersFetchResponse = { error?: any /** The data to be passed to the Hog function from the response */ response?: { - status: number + status: number | 'failure' body: any } | null timings?: HogFunctionTiming[] From 52611be45911d06c52287032b1574b0d9f491241 Mon Sep 17 00:00:00 2001 From: Ben White Date: Wed, 4 Sep 2024 14:55:48 +0200 Subject: [PATCH 43/75] Fixed up setting and parsing of blobs --- plugin-server/src/cdp/cdp-consumers.ts | 2 ++ plugin-server/src/cdp/fetch-executor.ts | 15 ++++++++++----- plugin-server/src/cdp/hog-executor.ts | 10 ++++++++-- plugin-server/src/cdp/types.ts | 7 ++++--- plugin-server/src/cdp/utils.ts | 3 +++ rust/cyclotron-node/src/worker.ts | 3 +++ 6 files changed, 30 insertions(+), 10 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index f52ac04db8dd8..79f960e3406d8 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -762,6 +762,7 @@ export class CdpCyclotronWorker extends CdpConsumerBase { vmState: serializeHogFunctionInvocation(item.invocation), queueName: item.invocation.queue, parameters: item.invocation.queueParameters ?? null, + blob: item.invocation.queueBlob ?? null, }) } await this.cyclotronWorker?.flushJob(id) @@ -801,6 +802,7 @@ export class CdpCyclotronWorker extends CdpConsumerBase { priority: job.priority, queue: (job.queueName as any) ?? 'hog', queueParameters: job.parameters as HogFunctionInvocationQueueParameters | undefined, + queueBlob: job.blob ?? undefined, vmState: parsedState.vmState, timings: parsedState.timings, }) diff --git a/plugin-server/src/cdp/fetch-executor.ts b/plugin-server/src/cdp/fetch-executor.ts index b2e99ef0a1836..cb9f16bf80c32 100644 --- a/plugin-server/src/cdp/fetch-executor.ts +++ b/plugin-server/src/cdp/fetch-executor.ts @@ -12,7 +12,7 @@ import { HogFunctionQueueParametersFetchRequest, HogFunctionQueueParametersFetchResponse, } from './types' -import { gzipObject } from './utils' +import { gzipObject, serializeHogFunctionInvocation } from './utils' export const BUCKETS_KB_WRITTEN = [0, 128, 512, 1024, 2024, 4096, 10240, Infinity] @@ -44,15 +44,18 @@ export class FetchExecutor { } const params = invocation.queueParameters as HogFunctionQueueParametersFetchRequest - if (params.body) { - histogramFetchPayloadSize.observe(params.body.length / 1024) + const blob = invocation.queueBlob + + const body = blob ? blob.toString() : undefined + if (body) { + histogramFetchPayloadSize.observe(body.length / 1024) } try { if (this.hogHookEnabledForTeams(invocation.teamId)) { // This is very temporary until we are commited to Cyclotron const payload: HogFunctionInvocationAsyncRequest = { - state: await gzipObject(invocation), + state: await gzipObject(serializeHogFunctionInvocation(invocation)), teamId: invocation.teamId, hogFunctionId: invocation.hogFunction.id, asyncFunctionRequest: { @@ -61,6 +64,7 @@ export class FetchExecutor { params.url, { ...params, + body, }, ], }, @@ -88,6 +92,7 @@ export class FetchExecutor { } const params = invocation.queueParameters as HogFunctionQueueParametersFetchRequest + const body = invocation.queueBlob ? invocation.queueBlob.toString() : undefined const resParams: HogFunctionQueueParametersFetchResponse = { response: { @@ -102,7 +107,7 @@ export class FetchExecutor { const start = performance.now() const fetchResponse = await trackedFetch(params.url, { method: params.method, - body: params.body, + body, headers: params.headers, timeout: this.serverConfig.EXTERNAL_REQUEST_TIMEOUT_MS, }) diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index 299a5334d0173..684aaa1d3a953 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -159,10 +159,15 @@ export class HogExecutor { error, timings = [], } = invocation.queueParameters as HogFunctionQueueParametersFetchResponse + if (response) { + // Convert from buffer to string + response.body = invocation.queueBlob ? Buffer.from(invocation.queueBlob).toString() : undefined + } // Reset the queue parameters to be sure invocation.queue = 'hog' invocation.queueParameters = undefined + invocation.queueBlob = undefined const status = typeof response?.status === 'number' ? response.status : 503 @@ -337,10 +342,11 @@ export class HogExecutor { url, method, headers, - body, + // body, return_queue: 'hog', } - + // The payload is always blob encoded + result.invocation.queueBlob = body ? Buffer.from(body) : undefined break default: throw new Error(`Unknown async function '${execRes.asyncFunctionName}'`) diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index cdcc0a4927bcf..eea3841b0ea8f 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -156,7 +156,7 @@ export interface HogFunctionTiming { export type HogFunctionQueueParametersFetchRequest = { url: string method: string - body: string + // body: string return_queue: string max_tries?: number headers?: Record @@ -167,8 +167,8 @@ export type HogFunctionQueueParametersFetchResponse = { error?: any /** The data to be passed to the Hog function from the response */ response?: { - status: number | 'failure' - body: any + status: number + body?: any } | null timings?: HogFunctionTiming[] logs?: LogEntry[] @@ -186,6 +186,7 @@ export type HogFunctionInvocation = { priority: number queue: 'hog' | 'fetch' queueParameters?: HogFunctionInvocationQueueParameters + queueBlob?: Uint8Array // The current vmstate (set if the invocation is paused) vmState?: VMState timings: HogFunctionTiming[] diff --git a/plugin-server/src/cdp/utils.ts b/plugin-server/src/cdp/utils.ts index 22d7dcb57f7b3..b6ad78d732efb 100644 --- a/plugin-server/src/cdp/utils.ts +++ b/plugin-server/src/cdp/utils.ts @@ -231,6 +231,9 @@ export function serializeHogFunctionInvocation(invocation: HogFunctionInvocation const serializedInvocation: HogFunctionInvocationSerialized = { ...invocation, hogFunctionId: invocation.hogFunction.id, + // We clear the params as they are never used in the serialized form + queueParameters: undefined, + queueBlob: undefined, } delete (serializedInvocation as any).hogFunction diff --git a/rust/cyclotron-node/src/worker.ts b/rust/cyclotron-node/src/worker.ts index 53cab7de65a89..7b3411863af7d 100644 --- a/rust/cyclotron-node/src/worker.ts +++ b/rust/cyclotron-node/src/worker.ts @@ -113,5 +113,8 @@ export class CyclotronWorker { if (updates?.vmState) { cyclotron.setVmState(id, serializeObject('vmState', updates.vmState)) } + if (updates?.blob) { + cyclotron.setBlob(id, updates.blob) + } } } From 47a48189cb9058a013ef8c60f81ef6309f9b3d37 Mon Sep 17 00:00:00 2001 From: Ben White Date: Wed, 4 Sep 2024 15:19:55 +0200 Subject: [PATCH 44/75] Fixed up tests --- plugin-server/src/cdp/cdp-api.ts | 3 +- plugin-server/src/cdp/fetch-executor.ts | 11 ++----- plugin-server/src/cdp/types.ts | 2 -- plugin-server/tests/cdp/hog-executor.test.ts | 30 +++++++++----------- 4 files changed, 18 insertions(+), 28 deletions(-) diff --git a/plugin-server/src/cdp/cdp-api.ts b/plugin-server/src/cdp/cdp-api.ts index 34de05942471e..5b9b7f7532007 100644 --- a/plugin-server/src/cdp/cdp-api.ts +++ b/plugin-server/src/cdp/cdp-api.ts @@ -148,7 +148,8 @@ export class CdpApi { invocation: { ...invocation, queue: 'hog', - queueParameters: { response: { status: 200, body: {} } }, + queueParameters: { response: { status: 200 } }, + queueBlob: Buffer.from('{}'), }, finished: false, logs: [ diff --git a/plugin-server/src/cdp/fetch-executor.ts b/plugin-server/src/cdp/fetch-executor.ts index cb9f16bf80c32..6ab71f613f198 100644 --- a/plugin-server/src/cdp/fetch-executor.ts +++ b/plugin-server/src/cdp/fetch-executor.ts @@ -93,11 +93,11 @@ export class FetchExecutor { const params = invocation.queueParameters as HogFunctionQueueParametersFetchRequest const body = invocation.queueBlob ? invocation.queueBlob.toString() : undefined + let responseBody = '' const resParams: HogFunctionQueueParametersFetchResponse = { response: { status: 0, - body: {}, }, error: null, timings: [], @@ -112,12 +112,7 @@ export class FetchExecutor { timeout: this.serverConfig.EXTERNAL_REQUEST_TIMEOUT_MS, }) - let responseBody = await fetchResponse.text() - try { - responseBody = JSON.parse(responseBody) - } catch (err) { - // Ignore - } + responseBody = await fetchResponse.text() const duration = performance.now() - start @@ -128,7 +123,6 @@ export class FetchExecutor { resParams.response = { status: fetchResponse.status, - body: responseBody, } } catch (err) { status.error('🦔', `[HogExecutor] Error during fetch`, { error: String(err) }) @@ -140,6 +134,7 @@ export class FetchExecutor { ...invocation, queue: 'hog', queueParameters: resParams, + queueBlob: Buffer.from(responseBody), }, finished: false, logs: [], diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index eea3841b0ea8f..2ddbc35bf3d5d 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -156,7 +156,6 @@ export interface HogFunctionTiming { export type HogFunctionQueueParametersFetchRequest = { url: string method: string - // body: string return_queue: string max_tries?: number headers?: Record @@ -168,7 +167,6 @@ export type HogFunctionQueueParametersFetchResponse = { /** The data to be passed to the Hog function from the response */ response?: { status: number - body?: any } | null timings?: HogFunctionTiming[] logs?: LogEntry[] diff --git a/plugin-server/tests/cdp/hog-executor.test.ts b/plugin-server/tests/cdp/hog-executor.test.ts index dc6350e0bb3d2..5d22f63bea51a 100644 --- a/plugin-server/tests/cdp/hog-executor.test.ts +++ b/plugin-server/tests/cdp/hog-executor.test.ts @@ -2,7 +2,7 @@ import { DateTime } from 'luxon' import { HogExecutor } from '../../src/cdp/hog-executor' import { HogFunctionManager } from '../../src/cdp/hog-function-manager' -import { HogFunctionAsyncFunctionResponse, HogFunctionType } from '../../src/cdp/types' +import { HogFunctionInvocation, HogFunctionType } from '../../src/cdp/types' import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' import { createHogExecutionGlobals, @@ -11,8 +11,9 @@ import { insertHogFunction as _insertHogFunction, } from './fixtures' -const createAsyncFunctionResponse = (response?: Record): HogFunctionAsyncFunctionResponse => { - return { +const setupFetchResponse = (invocation: HogFunctionInvocation, options?: { status?: number; body?: string }): void => { + invocation.queue = 'hog' + invocation.queueParameters = { timings: [ { kind: 'async_function', @@ -20,11 +21,10 @@ const createAsyncFunctionResponse = (response?: Record): HogFunctio }, ], response: { - status: 200, - body: 'success', - ...response, + status: options?.status ?? 200, }, } + invocation.queueBlob = Buffer.from(options?.body ?? 'success') } describe('Hog Executor', () => { @@ -69,6 +69,7 @@ describe('Hog Executor', () => { hogFunction: invocation.hogFunction, queue: 'fetch', queueParameters: expect.any(Object), + queueBlob: expect.any(Buffer), timings: [ { kind: 'hog', @@ -133,7 +134,8 @@ describe('Hog Executor', () => { }, }) - expect(JSON.parse(result.invocation.queueParameters!.body)).toEqual({ + const body = JSON.parse(result.invocation.queueBlob!.toString()) + expect(body).toEqual({ event: { uuid: 'uuid', name: 'test', @@ -163,8 +165,7 @@ describe('Hog Executor', () => { expect(result.invocation.vmState).toBeDefined() // Simulate what the callback does - result.invocation.queue = 'hog' - result.invocation.queueParameters = createAsyncFunctionResponse() + setupFetchResponse(result.invocation) const secondResult = executor.execute(result.invocation) logs.push(...secondResult.logs) @@ -185,10 +186,7 @@ describe('Hog Executor', () => { it('parses the responses body if a string', () => { const result = executor.execute(createInvocation(hogFunction)) const logs = result.logs.splice(0, 100) - result.invocation.queue = 'hog' - result.invocation.queueParameters = createAsyncFunctionResponse({ - body: JSON.stringify({ foo: 'bar' }), - }) + setupFetchResponse(result.invocation, { body: JSON.stringify({ foo: 'bar' }) }) const secondResult = executor.execute(result.invocation) logs.push(...secondResult.logs) @@ -249,16 +247,14 @@ describe('Hog Executor', () => { // Start the function const result1 = executor.execute(invocation) // Run the response one time simulating a successful fetch - result1.invocation.queue = 'hog' - result1.invocation.queueParameters = createAsyncFunctionResponse() + setupFetchResponse(result1.invocation) const result2 = executor.execute(result1.invocation) expect(result2.finished).toBe(false) expect(result2.error).toBe(undefined) expect(result2.invocation.queue).toBe('fetch') // This time we should see an error for hitting the loop limit - result2.invocation.queue = 'hog' - result2.invocation.queueParameters = createAsyncFunctionResponse() + setupFetchResponse(result2.invocation) const result3 = executor.execute(result1.invocation) expect(result3.finished).toBe(false) expect(result3.error).toEqual('Exceeded maximum number of async steps: 2') From bb2b3e5eabe7adb4166f1138556283ede758ec10 Mon Sep 17 00:00:00 2001 From: Ben White Date: Wed, 4 Sep 2024 15:29:54 +0200 Subject: [PATCH 45/75] Fix tests --- .../cdp/cdp-processed-events-consumer.test.ts | 41 +------------------ 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts index 6e7040b4b1066..cbdac413a0f81 100644 --- a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts +++ b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts @@ -166,7 +166,7 @@ describe('CDP Processed Events Consumer', () => { matchInvocation(fnPrinterPageviewFilters, globals), ]) - expect(mockProducer.produce).toHaveBeenCalledTimes(2) + expect(mockProducer.produce).toHaveBeenCalledTimes(11) expect(decodeAllKafkaMessages()).toMatchObject([ { @@ -256,46 +256,7 @@ describe('CDP Processed Events Consumer', () => { }) }) - describe('kafka parsing', () => { - it('can parse incoming messages correctly', async () => { - await insertHogFunction({ - ...HOG_EXAMPLES.simple_fetch, - ...HOG_INPUTS_EXAMPLES.simple_fetch, - ...HOG_FILTERS_EXAMPLES.no_filters, - }) - // Create a message that should be processed by this function - // Run the function and check that it was executed - await processor._handleKafkaBatch([ - createMessage( - createIncomingEvent(team.id, { - uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', - event: '$pageview', - properties: JSON.stringify({ - $lib_version: '1.0.0', - }), - }) - ), - ]) - - // Generall check that the message seemed to get processed - expect(decodeAllKafkaMessages()).toMatchObject([ - { - key: expect.any(String), - topic: 'cdp_function_callbacks_test', - value: { - state: expect.any(String), - }, - waitForAck: true, - }, - ]) - }) - }) - describe('no delayed execution', () => { - // beforeEach(() => { - // hub.CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP = true - // }) - it('should invoke the initial function before enqueuing', async () => { await insertHogFunction({ ...HOG_EXAMPLES.simple_fetch, From 9e7bccad26e5c00761c0730d63dd0fc257ab47f9 Mon Sep 17 00:00:00 2001 From: Ben White Date: Wed, 4 Sep 2024 17:40:36 +0200 Subject: [PATCH 46/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 9 ++++----- plugin-server/src/cdp/hog-executor.ts | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index c17489ce84e54..22f7d5f62056b 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -224,7 +224,6 @@ abstract class CdpConsumerBase { // TODO: Add cyclotron check here and enqueue that way // For now we just enqueue to kafka // For kafka style this is overkill to enqueue this way but it simplifies migrating to the new system - const serializedInvocation = serializeHogFunctionInvocation(invocation) const request: HogFunctionInvocationSerializedCompressed = { @@ -748,12 +747,12 @@ export class CdpCyclotronWorker extends CdpConsumerBase { await Promise.all( invocations.map(async (item) => { const id = item.invocation.id - if (item.finished) { - status.debug('⚡️', 'Updating job to completed', id) - this.cyclotronWorker?.updateJob(id, 'completed') - } else if (item.error) { + if (item.error) { status.debug('⚡️', 'Updating job to failed', id) this.cyclotronWorker?.updateJob(id, 'failed') + } else if (item.finished) { + status.debug('⚡️', 'Updating job to completed', id) + this.cyclotronWorker?.updateJob(id, 'completed') } else { status.debug('⚡️', 'Updating job to available', id) this.cyclotronWorker?.updateJob(id, 'available', { diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index 684aaa1d3a953..3ecb3830cd724 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -374,6 +374,7 @@ export class HogExecutor { } } catch (err) { result.error = err.message + result.finished = true // Explicitly set to true to prevent infinite loops status.error( '🦔', `[HogExecutor] Error executing function ${invocation.hogFunction.id} - ${invocation.hogFunction.name}`, From 1311f64b439ce3e99f680e88f4f014f9bede7120 Mon Sep 17 00:00:00 2001 From: Ben White Date: Wed, 4 Sep 2024 17:53:58 +0200 Subject: [PATCH 47/75] Fix --- plugin-server/src/cdp/hog-executor.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index 3ecb3830cd724..a1e17cdce5fe2 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -159,9 +159,10 @@ export class HogExecutor { error, timings = [], } = invocation.queueParameters as HogFunctionQueueParametersFetchResponse + let responseBody: any = undefined if (response) { // Convert from buffer to string - response.body = invocation.queueBlob ? Buffer.from(invocation.queueBlob).toString() : undefined + responseBody = invocation.queueBlob ? Buffer.from(invocation.queueBlob).toString() : undefined } // Reset the queue parameters to be sure @@ -189,9 +190,9 @@ export class HogExecutor { throw new Error(error) } - if (typeof response?.body === 'string') { + if (typeof responseBody === 'string') { try { - response.body = JSON.parse(response.body) + responseBody = JSON.parse(responseBody) } catch (e) { // pass - if it isn't json we just pass it on } From e86ea4c66deaec6c3d61e39344a055728654b3ba Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 09:13:56 +0200 Subject: [PATCH 48/75] Fix --- plugin-server/src/cdp/cdp-consumers.ts | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 8d27c5c79052c..df27c1ddb32df 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -221,10 +221,13 @@ abstract class CdpConsumerBase { } protected async queueInvocationToKafka(invocation: HogFunctionInvocation) { - // TODO: Add cyclotron check here and enqueue that way - // For now we just enqueue to kafka - // For kafka style this is overkill to enqueue this way but it simplifies migrating to the new system - const serializedInvocation = serializeHogFunctionInvocation(invocation) + // NOTE: WE keep the queueParams args as kafka land still needs them + const serializedInvocation: HogFunctionInvocationSerialized = { + ...invocation, + hogFunctionId: invocation.hogFunction.id, + } + + delete (serializedInvocation as any).hogFunction const request: HogFunctionInvocationSerializedCompressed = { state: await gzipObject(serializedInvocation), From ac159d25fe93ba711a3b11e5a66baaf14a7b22b8 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 11:15:05 +0200 Subject: [PATCH 49/75] Fixes --- plugin-server/src/cdp/cdp-api.ts | 12 +++++++++--- plugin-server/src/cdp/hog-executor.ts | 9 ++++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/plugin-server/src/cdp/cdp-api.ts b/plugin-server/src/cdp/cdp-api.ts index 5b9b7f7532007..fb48faf31f15d 100644 --- a/plugin-server/src/cdp/cdp-api.ts +++ b/plugin-server/src/cdp/cdp-api.ts @@ -144,12 +144,18 @@ export class CdpApi { if (invocation.queue === 'fetch') { if (mock_async_functions) { // Add the state, simulating what executeAsyncResponse would do + + const fakeFetchResponse = { + status: 200, + body: '{}', + } + response = { invocation: { ...invocation, queue: 'hog', - queueParameters: { response: { status: 200 } }, - queueBlob: Buffer.from('{}'), + queueParameters: { response: { status: fakeFetchResponse.status } }, + queueBlob: Buffer.from(fakeFetchResponse.body), }, finished: false, logs: [ @@ -161,7 +167,7 @@ export class CdpApi { { level: 'info', timestamp: DateTime.now(), - message: `fetch(${JSON.stringify(invocation.queueParameters, null, 2)})`, + message: `fetch(${JSON.stringify(fakeFetchResponse, null, 2)})`, }, ], } diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index a1e17cdce5fe2..2d8dd2e3b145e 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -153,6 +153,7 @@ export class HogExecutor { try { // If the queueParameter is set then we have an expected format that we want to parse and add to the stack if (invocation.queueParameters) { + // NOTE: This is all based around the only response type being fetch currently const { logs = [], response = null, @@ -198,8 +199,14 @@ export class HogExecutor { } } + // Finally we create the response object as the VM expects + const fetchResponse = { + status, + body: responseBody, + } + // Add the response to the stack to continue execution - invocation.vmState!.stack.push(response) + invocation.vmState!.stack.push(fetchResponse) invocation.timings.push(...timings) result.logs = [...logs, ...result.logs] } From 04a2e902f0e9b3ec54eeb40067c940e1b988aa94 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 11:16:43 +0200 Subject: [PATCH 50/75] Fixes --- plugin-server/src/cdp/cdp-api.ts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/plugin-server/src/cdp/cdp-api.ts b/plugin-server/src/cdp/cdp-api.ts index fb48faf31f15d..4c40a8376c983 100644 --- a/plugin-server/src/cdp/cdp-api.ts +++ b/plugin-server/src/cdp/cdp-api.ts @@ -145,17 +145,18 @@ export class CdpApi { if (mock_async_functions) { // Add the state, simulating what executeAsyncResponse would do - const fakeFetchResponse = { - status: 200, - body: '{}', + // Re-parse the fetch args for the logging + const fetchArgs = { + ...invocation.queueParameters, + body: invocation.queueBlob?.toString(), } response = { invocation: { ...invocation, queue: 'hog', - queueParameters: { response: { status: fakeFetchResponse.status } }, - queueBlob: Buffer.from(fakeFetchResponse.body), + queueParameters: { response: { status: 200 } }, + queueBlob: Buffer.from('{}'), }, finished: false, logs: [ @@ -167,7 +168,7 @@ export class CdpApi { { level: 'info', timestamp: DateTime.now(), - message: `fetch(${JSON.stringify(fakeFetchResponse, null, 2)})`, + message: `fetch(${JSON.stringify(fetchArgs, null, 2)})`, }, ], } From af18caa0c7e87b028acfadc55ff3b0da09cf9f02 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 11:29:19 +0200 Subject: [PATCH 51/75] Removed old test --- .../tests/cdp/cdp-consumer.e2e.test.ts | 222 ------------------ 1 file changed, 222 deletions(-) delete mode 100644 plugin-server/tests/cdp/cdp-consumer.e2e.test.ts diff --git a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts b/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts deleted file mode 100644 index 8d6581aef9ef0..0000000000000 --- a/plugin-server/tests/cdp/cdp-consumer.e2e.test.ts +++ /dev/null @@ -1,222 +0,0 @@ -import { CdpFunctionCallbackConsumer, CdpProcessedEventsConsumer } from '../../src/cdp/cdp-consumers' -import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' -import { Hub, Team } from '../../src/types' -import { closeHub, createHub } from '../../src/utils/db/hub' -import { getFirstTeam, resetTestDatabase } from '../helpers/sql' -import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' -import { createHogExecutionGlobals, insertHogFunction as _insertHogFunction } from './fixtures' - -const mockConsumer = { - on: jest.fn(), - commitSync: jest.fn(), - commit: jest.fn(), - queryWatermarkOffsets: jest.fn(), - committed: jest.fn(), - assignments: jest.fn(), - isConnected: jest.fn(() => true), - getMetadata: jest.fn(), -} - -jest.mock('../../src/kafka/batch-consumer', () => { - return { - startBatchConsumer: jest.fn(() => - Promise.resolve({ - join: () => ({ - finally: jest.fn(), - }), - stop: jest.fn(), - consumer: mockConsumer, - }) - ), - } -}) - -jest.mock('../../src/utils/fetch', () => { - return { - trackedFetch: jest.fn(() => - Promise.resolve({ - status: 200, - text: () => Promise.resolve(JSON.stringify({ success: true })), - json: () => Promise.resolve({ success: true }), - }) - ), - } -}) - -jest.mock('../../src/utils/db/kafka-producer-wrapper', () => { - const mockKafkaProducer = { - producer: { - connect: jest.fn(), - }, - disconnect: jest.fn(), - produce: jest.fn(() => Promise.resolve()), - } - return { - KafkaProducerWrapper: jest.fn(() => mockKafkaProducer), - } -}) - -const mockFetch: jest.Mock = require('../../src/utils/fetch').trackedFetch - -const mockProducer = require('../../src/utils/db/kafka-producer-wrapper').KafkaProducerWrapper() - -jest.setTimeout(1000) - -const decodeKafkaMessage = (message: any): any => { - return { - ...message, - value: JSON.parse(message.value.toString()), - } -} - -const decodeAllKafkaMessages = (): any[] => { - return mockProducer.produce.mock.calls.map((x) => decodeKafkaMessage(x[0])) -} - -const convertToKafkaMessage = (message: any): any => { - return { - ...message, - value: Buffer.from(JSON.stringify(message.value)), - } -} - -/** - * NOTE: This isn't fully e2e... We still mock kafka but we trigger one queue from the other in a loop - */ -describe('CDP Consumers E2E', () => { - let processedEventsConsumer: CdpProcessedEventsConsumer - let functionProcessor: CdpFunctionCallbackConsumer - let hub: Hub - let team: Team - - const insertHogFunction = async (hogFunction: Partial) => { - const item = await _insertHogFunction(hub.postgres, team.id, hogFunction) - // Trigger the reload that django would do - await processedEventsConsumer.hogFunctionManager.reloadAllHogFunctions() - await functionProcessor.hogFunctionManager.reloadAllHogFunctions() - return item - } - - beforeEach(async () => { - await resetTestDatabase() - hub = await createHub() - team = await getFirstTeam(hub) - - processedEventsConsumer = new CdpProcessedEventsConsumer(hub) - await processedEventsConsumer.start() - functionProcessor = new CdpFunctionCallbackConsumer(hub) - await functionProcessor.start() - - mockFetch.mockClear() - }) - - afterEach(async () => { - jest.setTimeout(10000) - await processedEventsConsumer.stop() - await functionProcessor.stop() - await closeHub(hub) - }) - - afterAll(() => { - jest.useRealTimers() - }) - - describe('e2e fetch function', () => { - /** - * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios - */ - - let fnFetchNoFilters: HogFunctionType - let globals: HogFunctionInvocationGlobals - - let kafkaMessages = { - metrics: [] as any[], - logs: [] as any[], - invocations: [] as any[], - } - - beforeEach(async () => { - fnFetchNoFilters = await insertHogFunction({ - ...HOG_EXAMPLES.simple_fetch, - ...HOG_INPUTS_EXAMPLES.simple_fetch, - ...HOG_FILTERS_EXAMPLES.no_filters, - }) - - globals = createHogExecutionGlobals({ - project: { - id: team.id, - } as any, - event: { - uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', - name: '$pageview', - properties: { - $current_url: 'https://posthog.com', - $lib_version: '1.0.0', - }, - } as any, - }) - - kafkaMessages = { - metrics: [], - logs: [], - invocations: [], - } - }) - - const gatherProducedMessages = () => { - const allMessages = decodeAllKafkaMessages() - - allMessages.forEach((message) => { - if (message.topic === 'clickhouse_app_metrics2_test') { - kafkaMessages.metrics.push(message) - } else if (message.topic === 'log_entries_test') { - kafkaMessages.logs.push(message) - } else if (message.topic === 'cdp_function_callbacks_test') { - kafkaMessages.invocations.push(message) - } else { - throw new Error(`Unknown topic: ${message.topic}`) - } - }) - - mockProducer.produce.mockClear() - } - - it('should invoke a function via kafka transportation until completed', async () => { - // NOTE: We can skip kafka as the entry point - const invocations = await processedEventsConsumer.processBatch([globals]) - expect(invocations).toHaveLength(1) - gatherProducedMessages() - - expect(kafkaMessages.invocations).toHaveLength(1) - expect(kafkaMessages.invocations[0].topic).toEqual('cdp_function_callbacks_test') - mockProducer.produce.mockClear() - - while (kafkaMessages.invocations.length) { - await functionProcessor._handleKafkaBatch([convertToKafkaMessage(kafkaMessages.invocations[0])]) - kafkaMessages.invocations = [] - gatherProducedMessages() - } - - expect(kafkaMessages.metrics).toMatchObject([ - { - key: fnFetchNoFilters.id.toString(), - value: { - app_source: 'hog_function', - app_source_id: fnFetchNoFilters.id.toString(), - count: 1, - metric_kind: 'success', - metric_name: 'succeeded', - team_id: 2, - }, - }, - ]) - expect(kafkaMessages.logs.map((x) => x.value.message)).toEqual([ - 'Executing function', - "Suspending function due to async function call 'fetch'. Payload: 1902 bytes", - 'Resuming function', - 'Fetch response:, {"status":200,"body":{"success":true}}', - expect.stringContaining('Function completed'), - ]) - }) - }) -}) From bfbf8a0f00edc74563545abc92ae53f853e06534 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 11:51:52 +0200 Subject: [PATCH 52/75] Fixes --- plugin-server/src/cdp/cdp-api.ts | 4 ++-- plugin-server/src/cdp/fetch-executor.ts | 10 +++++----- plugin-server/src/cdp/hog-executor.ts | 12 +++++++----- plugin-server/src/cdp/utils.ts | 4 ++++ plugin-server/tests/cdp/hog-executor.test.ts | 4 ++-- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/plugin-server/src/cdp/cdp-api.ts b/plugin-server/src/cdp/cdp-api.ts index 4c40a8376c983..cfc70e7f1b8fc 100644 --- a/plugin-server/src/cdp/cdp-api.ts +++ b/plugin-server/src/cdp/cdp-api.ts @@ -9,7 +9,7 @@ import { HogExecutor } from './hog-executor' import { HogFunctionManager } from './hog-function-manager' import { HogWatcher, HogWatcherState } from './hog-watcher' import { HogFunctionInvocationResult, HogFunctionType, LogEntry } from './types' -import { createInvocation } from './utils' +import { createInvocation, queueBlobToString } from './utils' export class CdpApi { private hogExecutor: HogExecutor @@ -148,7 +148,7 @@ export class CdpApi { // Re-parse the fetch args for the logging const fetchArgs = { ...invocation.queueParameters, - body: invocation.queueBlob?.toString(), + body: queueBlobToString(invocation.queueBlob), } response = { diff --git a/plugin-server/src/cdp/fetch-executor.ts b/plugin-server/src/cdp/fetch-executor.ts index 6ab71f613f198..e9fa2a4ffbb8c 100644 --- a/plugin-server/src/cdp/fetch-executor.ts +++ b/plugin-server/src/cdp/fetch-executor.ts @@ -12,7 +12,7 @@ import { HogFunctionQueueParametersFetchRequest, HogFunctionQueueParametersFetchResponse, } from './types' -import { gzipObject, serializeHogFunctionInvocation } from './utils' +import { gzipObject, queueBlobToString, serializeHogFunctionInvocation } from './utils' export const BUCKETS_KB_WRITTEN = [0, 128, 512, 1024, 2024, 4096, 10240, Infinity] @@ -40,13 +40,13 @@ export class FetchExecutor { async execute(invocation: HogFunctionInvocation): Promise { if (invocation.queue !== 'fetch' || !invocation.queueParameters) { - throw new Error('Bad invocation') + // throw new Error('Bad invocation') + return } const params = invocation.queueParameters as HogFunctionQueueParametersFetchRequest - const blob = invocation.queueBlob - const body = blob ? blob.toString() : undefined + const body = queueBlobToString(invocation.queueBlob) if (body) { histogramFetchPayloadSize.observe(body.length / 1024) } @@ -92,7 +92,7 @@ export class FetchExecutor { } const params = invocation.queueParameters as HogFunctionQueueParametersFetchRequest - const body = invocation.queueBlob ? invocation.queueBlob.toString() : undefined + const body = queueBlobToString(invocation.queueBlob) || '' let responseBody = '' const resParams: HogFunctionQueueParametersFetchResponse = { diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index 2d8dd2e3b145e..28bad8e38099a 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -14,7 +14,7 @@ import { HogFunctionQueueParametersFetchResponse, HogFunctionType, } from './types' -import { convertToHogFunctionFilterGlobal } from './utils' +import { convertToHogFunctionFilterGlobal, queueBlobToString } from './utils' const MAX_ASYNC_STEPS = 2 const MAX_HOG_LOGS = 10 @@ -163,7 +163,7 @@ export class HogExecutor { let responseBody: any = undefined if (response) { // Convert from buffer to string - responseBody = invocation.queueBlob ? Buffer.from(invocation.queueBlob).toString() : undefined + responseBody = queueBlobToString(invocation.queueBlob) } // Reset the queue parameters to be sure @@ -341,16 +341,18 @@ export class HogExecutor { const headers = fetchOptions?.headers || { 'Content-Type': 'application/json', } - let body = fetchOptions?.body // Modify the body to ensure it is a string (we allow Hog to send an object to keep things simple) - body = body ? (typeof body === 'string' ? body : JSON.stringify(body)) : body + const body: string | undefined = fetchOptions?.body + ? typeof fetchOptions.body === 'string' + ? fetchOptions.body + : JSON.stringify(fetchOptions.body) + : fetchOptions?.body result.invocation.queue = 'fetch' result.invocation.queueParameters = { url, method, headers, - // body, return_queue: 'hog', } // The payload is always blob encoded diff --git a/plugin-server/src/cdp/utils.ts b/plugin-server/src/cdp/utils.ts index b6ad78d732efb..db1884c8c8a69 100644 --- a/plugin-server/src/cdp/utils.ts +++ b/plugin-server/src/cdp/utils.ts @@ -240,3 +240,7 @@ export function serializeHogFunctionInvocation(invocation: HogFunctionInvocation return serializedInvocation } + +export function queueBlobToString(blob?: HogFunctionInvocation["queueBlob"]): string | undefined { + return blob ? Buffer.from(blob).toString('utf-8') : undefined +} \ No newline at end of file diff --git a/plugin-server/tests/cdp/hog-executor.test.ts b/plugin-server/tests/cdp/hog-executor.test.ts index 5d22f63bea51a..2fc0f9f78cc4b 100644 --- a/plugin-server/tests/cdp/hog-executor.test.ts +++ b/plugin-server/tests/cdp/hog-executor.test.ts @@ -134,7 +134,7 @@ describe('Hog Executor', () => { }, }) - const body = JSON.parse(result.invocation.queueBlob!.toString()) + const body = JSON.parse(Buffer.from(result.invocation.queueBlob!).toString()) expect(body).toEqual({ event: { uuid: 'uuid', @@ -256,7 +256,7 @@ describe('Hog Executor', () => { // This time we should see an error for hitting the loop limit setupFetchResponse(result2.invocation) const result3 = executor.execute(result1.invocation) - expect(result3.finished).toBe(false) + expect(result3.finished).toBe(true) expect(result3.error).toEqual('Exceeded maximum number of async steps: 2') expect(result3.logs.map((log) => log.message)).toEqual([ 'Resuming function', From cef34d9d83c07099398c7409f6724b405a17a541 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 12:13:30 +0200 Subject: [PATCH 53/75] Fixes --- .../cdp/cdp-processed-events-consumer.test.ts | 141 ++++++++++-------- 1 file changed, 81 insertions(+), 60 deletions(-) diff --git a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts index cbdac413a0f81..711d5a627a2b2 100644 --- a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts +++ b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts @@ -167,22 +167,88 @@ describe('CDP Processed Events Consumer', () => { ]) expect(mockProducer.produce).toHaveBeenCalledTimes(11) - expect(decodeAllKafkaMessages()).toMatchObject([ { - key: expect.any(String), - topic: 'cdp_function_callbacks_test', + topic: 'log_entries_test', value: { - state: expect.any(String), + message: 'Executing function', + log_source_id: fnFetchNoFilters.id, + }, + }, + { + topic: 'log_entries_test', + value: { + message: "Suspending function due to async function call 'fetch'. Payload: 1902 bytes", + log_source_id: fnFetchNoFilters.id, + }, + }, + { + topic: 'clickhouse_app_metrics2_test', + value: { + app_source: 'hog_function', + team_id: 2, + app_source_id: fnPrinterPageviewFilters.id, + metric_kind: 'success', + metric_name: 'succeeded', + count: 1, + }, + }, + { + topic: 'log_entries_test', + value: { + message: 'Executing function', + log_source_id: fnPrinterPageviewFilters.id, + }, + }, + { + topic: 'log_entries_test', + value: { + message: 'test', + log_source_id: fnPrinterPageviewFilters.id, + }, + }, + { + topic: 'log_entries_test', + value: { + message: '{"nested":{"foo":"***REDACTED***","bool":false,"null":null}}', + log_source_id: fnPrinterPageviewFilters.id, + }, + }, + { + topic: 'log_entries_test', + value: { + message: '{"foo":"***REDACTED***","bool":false,"null":null}', + log_source_id: fnPrinterPageviewFilters.id, + }, + }, + { + topic: 'log_entries_test', + value: { + message: 'substring: ***REDACTED***', + log_source_id: fnPrinterPageviewFilters.id, + }, + }, + { + topic: 'log_entries_test', + value: { + message: + '{"input_1":"test","secret_input_2":{"foo":"***REDACTED***","bool":false,"null":null},"secret_input_3":"***REDACTED***"}', + log_source_id: fnPrinterPageviewFilters.id, + }, + }, + { + topic: 'log_entries_test', + value: { + message: expect.stringContaining('Function completed'), + log_source_id: fnPrinterPageviewFilters.id, }, - waitForAck: true, }, { - key: expect.any(String), topic: 'cdp_function_callbacks_test', value: { state: expect.any(String), }, + key: expect.stringContaining(fnFetchNoFilters.id.toString()), waitForAck: true, }, ]) @@ -195,7 +261,9 @@ describe('CDP Processed Events Consumer', () => { expect(invocations).toHaveLength(1) expect(invocations).toMatchObject([matchInvocation(fnFetchNoFilters, globals)]) - expect(mockProducer.produce).toHaveBeenCalledTimes(2) + expect(mockProducer.produce).toHaveBeenCalledTimes(4) + + console.log(decodeAllKafkaMessages()) expect(decodeAllKafkaMessages()).toMatchObject([ { @@ -211,6 +279,12 @@ describe('CDP Processed Events Consumer', () => { timestamp: expect.any(String), }, }, + { + topic: 'log_entries_test', + }, + { + topic: 'log_entries_test', + }, { topic: 'cdp_function_callbacks_test', }, @@ -255,58 +329,5 @@ describe('CDP Processed Events Consumer', () => { ]) }) }) - - describe('no delayed execution', () => { - it('should invoke the initial function before enqueuing', async () => { - await insertHogFunction({ - ...HOG_EXAMPLES.simple_fetch, - ...HOG_INPUTS_EXAMPLES.simple_fetch, - ...HOG_FILTERS_EXAMPLES.no_filters, - }) - // Create a message that should be processed by this function - // Run the function and check that it was executed - await processor._handleKafkaBatch([ - createMessage( - createIncomingEvent(team.id, { - uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', - event: '$pageview', - properties: JSON.stringify({ - $lib_version: '1.0.0', - }), - }) - ), - ]) - - // General check that the message seemed to get processed - expect(decodeAllKafkaMessages()).toMatchObject([ - { - key: expect.any(String), - topic: 'log_entries_test', - value: { - message: 'Executing function', - }, - waitForAck: true, - }, - { - key: expect.any(String), - topic: 'log_entries_test', - value: { - message: expect.stringContaining( - "Suspending function due to async function call 'fetch'. Payload" - ), - }, - waitForAck: true, - }, - { - key: expect.any(String), - topic: 'cdp_function_callbacks_test', - value: { - state: expect.any(String), - }, - waitForAck: true, - }, - ]) - }) - }) }) }) From aac54afd3410c6665144dc213c20fc7eee1664e7 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 12:33:24 +0200 Subject: [PATCH 54/75] Fixes --- plugin-server/src/cdp/utils.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugin-server/src/cdp/utils.ts b/plugin-server/src/cdp/utils.ts index db1884c8c8a69..82ef922b5e230 100644 --- a/plugin-server/src/cdp/utils.ts +++ b/plugin-server/src/cdp/utils.ts @@ -241,6 +241,6 @@ export function serializeHogFunctionInvocation(invocation: HogFunctionInvocation return serializedInvocation } -export function queueBlobToString(blob?: HogFunctionInvocation["queueBlob"]): string | undefined { +export function queueBlobToString(blob?: HogFunctionInvocation['queueBlob']): string | undefined { return blob ? Buffer.from(blob).toString('utf-8') : undefined -} \ No newline at end of file +} From 0ee8ba1651de309e4c5d1ca6fb7c659e3dab9563 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 12:35:24 +0200 Subject: [PATCH 55/75] Fixes --- plugin-server/tests/cdp/helpers/kafka-observer.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugin-server/tests/cdp/helpers/kafka-observer.ts b/plugin-server/tests/cdp/helpers/kafka-observer.ts index 5808fa0ca3cda..91af486758c3b 100644 --- a/plugin-server/tests/cdp/helpers/kafka-observer.ts +++ b/plugin-server/tests/cdp/helpers/kafka-observer.ts @@ -1,5 +1,6 @@ import { KafkaConsumer, Message } from 'node-rdkafka' +import { createAdminClient, ensureTopicExists } from '../../../src/kafka/admin' import { createRdConnectionConfigFromEnvVars } from '../../../src/kafka/config' import { createKafkaConsumer } from '../../../src/kafka/consumer' import { Hub } from '../../../src/types' @@ -21,6 +22,9 @@ export const createKafkaObserver = async (hub: Hub, topics: string[]): Promise ensureTopicExists(adminClient, topic, 1000))) + consumer.connect() consumer.subscribe(topics) const messages: { From 64aa1e280a547cf7673259e2c5164ce10eb808c4 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 12:51:02 +0200 Subject: [PATCH 56/75] Fixes --- .../tests/cdp/cdp-processed-events-consumer.test.ts | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts index 711d5a627a2b2..1b5f266d88b5b 100644 --- a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts +++ b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts @@ -5,12 +5,7 @@ import { Hub, Team } from '../../src/types' import { closeHub, createHub } from '../../src/utils/db/hub' import { getFirstTeam, resetTestDatabase } from '../helpers/sql' import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' -import { - createHogExecutionGlobals, - createIncomingEvent, - createMessage, - insertHogFunction as _insertHogFunction, -} from './fixtures' +import { createHogExecutionGlobals, insertHogFunction as _insertHogFunction } from './fixtures' const mockConsumer = { on: jest.fn(), From 4a53ac132201e0a8503dda5c0bc497a15efe936d Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 12:55:40 +0200 Subject: [PATCH 57/75] Fixes --- rust/bin/migrate-cyclotron | 2 -- 1 file changed, 2 deletions(-) diff --git a/rust/bin/migrate-cyclotron b/rust/bin/migrate-cyclotron index 902485150750c..91fa85c05fc0b 100755 --- a/rust/bin/migrate-cyclotron +++ b/rust/bin/migrate-cyclotron @@ -4,7 +4,5 @@ SCRIPT_DIR=$(dirname "$(readlink -f "$0")") export DATABASE_NAME=${DATABASE_NAME:-cyclotron} export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} -echo "Script dir: $SCRIPT_DIR" - sqlx database create -D "$DATABASE_URL" sqlx migrate run -D "$DATABASE_URL" --source $SCRIPT_DIR/../cyclotron-core/migrations From 4e9ea89ae7282c31d0f67f7890ae258f8e899f00 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 13:01:59 +0200 Subject: [PATCH 58/75] Fix --- rust/bin/migrate-cyclotron | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/bin/migrate-cyclotron b/rust/bin/migrate-cyclotron index 91fa85c05fc0b..c42b567631053 100755 --- a/rust/bin/migrate-cyclotron +++ b/rust/bin/migrate-cyclotron @@ -4,5 +4,7 @@ SCRIPT_DIR=$(dirname "$(readlink -f "$0")") export DATABASE_NAME=${DATABASE_NAME:-cyclotron} export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} +echo "Performing cyclotron migrations for $DATABASE_URL" + sqlx database create -D "$DATABASE_URL" sqlx migrate run -D "$DATABASE_URL" --source $SCRIPT_DIR/../cyclotron-core/migrations From 2951d9b1d5bc42d1b3501bff3037a2b3227ccf40 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 13:10:06 +0200 Subject: [PATCH 59/75] Fix --- rust/bin/migrate-cyclotron | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/bin/migrate-cyclotron b/rust/bin/migrate-cyclotron index c42b567631053..281f7c90113f1 100755 --- a/rust/bin/migrate-cyclotron +++ b/rust/bin/migrate-cyclotron @@ -1,10 +1,10 @@ #!/bin/sh SCRIPT_DIR=$(dirname "$(readlink -f "$0")") -export DATABASE_NAME=${DATABASE_NAME:-cyclotron} -export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} +DATABASE_NAME=${DATABASE_NAME:-cyclotron} +DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} -echo "Performing cyclotron migrations for $DATABASE_URL" +echo "Performing cyclotron migrations for $DATABASE_URL (DATABASE_NAME=$DATABASE_NAME)" sqlx database create -D "$DATABASE_URL" sqlx migrate run -D "$DATABASE_URL" --source $SCRIPT_DIR/../cyclotron-core/migrations From 1fd7b9987b521ad53f39f311461ae72e19cc1dfd Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 13:18:20 +0200 Subject: [PATCH 60/75] Fix --- .github/workflows/ci-plugin-server.yml | 8 ++++++++ plugin-server/package.json | 2 +- rust/bin/migrate-cyclotron | 10 +++++----- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml index 0104b65338863..59d0da81f97eb 100644 --- a/.github/workflows/ci-plugin-server.yml +++ b/.github/workflows/ci-plugin-server.yml @@ -121,6 +121,14 @@ jobs: - name: Install rust uses: dtolnay/rust-toolchain@1.77 + - uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + rust/target + key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.lock') }} + - name: Install sqlx-cli working-directory: rust run: cargo install sqlx-cli --no-default-features --features native-tls,postgres diff --git a/plugin-server/package.json b/plugin-server/package.json index ccc070bc3a01a..b30a8505f52b4 100644 --- a/plugin-server/package.json +++ b/plugin-server/package.json @@ -24,7 +24,7 @@ "prepublishOnly": "pnpm build", "setup:dev:clickhouse": "cd .. && DEBUG=1 python manage.py migrate_clickhouse", "setup:test": "cd .. && TEST=1 python manage.py setup_test_environment && cd plugin-server && pnpm run setup:test:cyclotron", - "setup:test:cyclotron": "DATABASE_NAME=test_cyclotron ../rust/bin/migrate-cyclotron", + "setup:test:cyclotron": "CYCLOTRON_DATABASE_NAME=test_cyclotron ../rust/bin/migrate-cyclotron", "services:start": "cd .. && docker compose -f docker-compose.dev.yml up", "services:stop": "cd .. && docker compose -f docker-compose.dev.yml down", "services:clean": "cd .. && docker compose -f docker-compose.dev.yml rm -v", diff --git a/rust/bin/migrate-cyclotron b/rust/bin/migrate-cyclotron index 281f7c90113f1..cde8d8b4d65fc 100755 --- a/rust/bin/migrate-cyclotron +++ b/rust/bin/migrate-cyclotron @@ -1,10 +1,10 @@ #!/bin/sh SCRIPT_DIR=$(dirname "$(readlink -f "$0")") -DATABASE_NAME=${DATABASE_NAME:-cyclotron} -DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$DATABASE_NAME} +CYCLOTRON_DATABASE_NAME=${CYCLOTRON_DATABASE_NAME:-cyclotron} +CYCLOTRON_DATABASE_URL=${CYCLOTRON_DATABASE_URL:-postgres://posthog:posthog@localhost:5432/$CYCLOTRON_DATABASE_NAME} -echo "Performing cyclotron migrations for $DATABASE_URL (DATABASE_NAME=$DATABASE_NAME)" +echo "Performing cyclotron migrations for $CYCLOTRON_DATABASE_URL (DATABASE_NAME=$CYCLOTRON_DATABASE_NAME)" -sqlx database create -D "$DATABASE_URL" -sqlx migrate run -D "$DATABASE_URL" --source $SCRIPT_DIR/../cyclotron-core/migrations +sqlx database create -D "$CYCLOTRON_DATABASE_URL" +sqlx migrate run -D "$CYCLOTRON_DATABASE_URL" --source $SCRIPT_DIR/../cyclotron-core/migrations From 158af77463b5f79f89098977bfe0527df4356270 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 13:29:15 +0200 Subject: [PATCH 61/75] Fixes --- plugin-server/src/cdp/cdp-consumers.ts | 3 --- plugin-server/tests/cdp/cdp-e2e.test.ts | 2 +- plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts | 2 -- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index df27c1ddb32df..b98c8d2a5d7be 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -368,8 +368,6 @@ abstract class CdpConsumerBase { } public isHealthy() { - // TODO: Check either kafka consumer or cyclotron worker exists - // and that whatever exists is healthy return this.batchConsumer?.isHealthy() } } @@ -801,7 +799,6 @@ export class CdpCyclotronWorker extends CdpConsumerBase { const parsedState = job.vmState as HogFunctionInvocationSerialized - // TODO: Should ID come from the job or the state? invocations.push({ id: job.id, globals: parsedState.globals, diff --git a/plugin-server/tests/cdp/cdp-e2e.test.ts b/plugin-server/tests/cdp/cdp-e2e.test.ts index 8ad3dbcb4e578..7795345fe8376 100644 --- a/plugin-server/tests/cdp/cdp-e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-e2e.test.ts @@ -100,7 +100,7 @@ describe('CDP E2E', () => { kafkaObserver?.stop(), cyclotronWorker?.stop(), cyclotronFetchWorker?.stop(), - ]) + ]).catch(console.error) await closeHub(hub) }) diff --git a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts index 1b5f266d88b5b..11806c8595a10 100644 --- a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts +++ b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts @@ -258,8 +258,6 @@ describe('CDP Processed Events Consumer', () => { expect(invocations).toMatchObject([matchInvocation(fnFetchNoFilters, globals)]) expect(mockProducer.produce).toHaveBeenCalledTimes(4) - console.log(decodeAllKafkaMessages()) - expect(decodeAllKafkaMessages()).toMatchObject([ { key: expect.any(String), From 8302af517fb7f7602d07f599b72c34ed39851dd4 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 13:46:05 +0200 Subject: [PATCH 62/75] Fix tests --- plugin-server/tests/cdp/hog-function-manager.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/plugin-server/tests/cdp/hog-function-manager.test.ts b/plugin-server/tests/cdp/hog-function-manager.test.ts index 1624999c93058..3f34fcb4fe378 100644 --- a/plugin-server/tests/cdp/hog-function-manager.test.ts +++ b/plugin-server/tests/cdp/hog-function-manager.test.ts @@ -81,6 +81,7 @@ describe('HogFunctionManager', () => { }) afterEach(async () => { + await manager.stop() await closeHub(hub) }) From d284e1d2fd46dcf9b6e18f652eae58e9ad2c8095 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 13:54:35 +0200 Subject: [PATCH 63/75] Fixes --- bin/migrate | 3 +++ plugin-server/src/cdp/fetch-executor.ts | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/migrate b/bin/migrate index bc7608fa7b4c8..bce5e525182dc 100755 --- a/bin/migrate +++ b/bin/migrate @@ -2,6 +2,9 @@ set -e SCRIPT_DIR=$(dirname "$(readlink -f "$0")") +ls $SCRIPT_DIR +ls $SCRIPT_DIR/../rust +ls $SCRIPT_DIR/../rust/bin bash $SCRIPT_DIR/../rust/bin/migrate-cyclotron python manage.py migrate diff --git a/plugin-server/src/cdp/fetch-executor.ts b/plugin-server/src/cdp/fetch-executor.ts index e9fa2a4ffbb8c..8907fafc35239 100644 --- a/plugin-server/src/cdp/fetch-executor.ts +++ b/plugin-server/src/cdp/fetch-executor.ts @@ -40,7 +40,7 @@ export class FetchExecutor { async execute(invocation: HogFunctionInvocation): Promise { if (invocation.queue !== 'fetch' || !invocation.queueParameters) { - // throw new Error('Bad invocation') + status.error('🦔', `[HogExecutor] Bad invocation`, { invocation }) return } From 37c6c2d4a761bae6d8a11fe424fc3260325bbfdd Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 13:59:41 +0200 Subject: [PATCH 64/75] Fixes --- plugin-server/src/utils/status.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/plugin-server/src/utils/status.ts b/plugin-server/src/utils/status.ts index d4bb164bb25e5..0b6b8f26ca1c5 100644 --- a/plugin-server/src/utils/status.ts +++ b/plugin-server/src/utils/status.ts @@ -67,7 +67,14 @@ export class Status implements StatusBlueprint { const logMessage = `[${this.prompt}] ${icon} ${message}` if (!this.logger) { - throw new Error(`Logger has been closed! Cannot log: ${logMessage}`) + if (isProdEnv()) { + // This can throw on tests if the logger is closed. We don't really want tests to be bothered with this. + throw new Error(`Logger has been closed! Cannot log: ${logMessage}`) + } + console.log( + `Logger has been closed! Cannot log: ${logMessage}. Logging to console instead due to non-prod env.` + ) + return } if (extra instanceof Object) { this.logger[type]({ ...extra, msg: logMessage }) From c5682e82dc705d7a6e3563cc0215f0402c7f8918 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 14:01:25 +0200 Subject: [PATCH 65/75] Fixes --- plugin-server/tests/cdp/helpers/kafka-observer.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/plugin-server/tests/cdp/helpers/kafka-observer.ts b/plugin-server/tests/cdp/helpers/kafka-observer.ts index 91af486758c3b..f9f0c9c61f27d 100644 --- a/plugin-server/tests/cdp/helpers/kafka-observer.ts +++ b/plugin-server/tests/cdp/helpers/kafka-observer.ts @@ -24,6 +24,7 @@ export const createKafkaObserver = async (hub: Hub, topics: string[]): Promise ensureTopicExists(adminClient, topic, 1000))) + adminClient.disconnect() consumer.connect() consumer.subscribe(topics) From 4cdf0ce17e173e512ee86c63a3b5ead2a620a2b2 Mon Sep 17 00:00:00 2001 From: Ben White Date: Thu, 5 Sep 2024 16:07:59 +0200 Subject: [PATCH 66/75] fix --- bin/migrate | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/migrate b/bin/migrate index bce5e525182dc..2f2aa49ed749b 100755 --- a/bin/migrate +++ b/bin/migrate @@ -2,10 +2,10 @@ set -e SCRIPT_DIR=$(dirname "$(readlink -f "$0")") -ls $SCRIPT_DIR -ls $SCRIPT_DIR/../rust -ls $SCRIPT_DIR/../rust/bin -bash $SCRIPT_DIR/../rust/bin/migrate-cyclotron +# NOTE when running in docker, rust might not exist so we need to check for it +if [ -d "$SCRIPT_DIR/../rust" ]; then + bash $SCRIPT_DIR/../rust/bin/migrate-cyclotron +fi python manage.py migrate python manage.py migrate_clickhouse From 6909aebeec095770f6349fb7445dba1881dc1dd7 Mon Sep 17 00:00:00 2001 From: Ben White Date: Fri, 6 Sep 2024 14:26:17 +0200 Subject: [PATCH 67/75] Playing --- plugin-server/tests/cdp/cdp-e2e.test.ts | 30 +++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/plugin-server/tests/cdp/cdp-e2e.test.ts b/plugin-server/tests/cdp/cdp-e2e.test.ts index 7795345fe8376..dcdfdcf9954fc 100644 --- a/plugin-server/tests/cdp/cdp-e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-e2e.test.ts @@ -94,13 +94,29 @@ describe('CDP E2E', () => { }) afterEach(async () => { - await Promise.all([ - processedEventsConsumer?.stop(), - functionProcessor?.stop(), - kafkaObserver?.stop(), - cyclotronWorker?.stop(), - cyclotronFetchWorker?.stop(), - ]).catch(console.error) + console.log('AfterEach', { + processedEventsConsumer, + functionProcessor, + kafkaObserver, + cyclotronWorker, + cyclotronFetchWorker, + }) + + const stoppers = [ + processedEventsConsumer?.stop().then(() => console.log('Stopped processedEventsConsumer')), + , + functionProcessor?.stop().then(() => console.log('Stopped functionProcessor')), + , + kafkaObserver?.stop().then(() => console.log('Stopped kafkaObserver')), + , + cyclotronWorker?.stop().then(() => console.log('Stopped cyclotronWorker')), + , + cyclotronFetchWorker?.stop().then(() => console.log('Stopped cyclotronFetchWorker')), + , + ] + + await Promise.all(stoppers) + await closeHub(hub) }) From 6729b858e419ae83192e208b427be2e4fd0215c9 Mon Sep 17 00:00:00 2001 From: Ben White Date: Fri, 6 Sep 2024 14:27:51 +0200 Subject: [PATCH 68/75] Fix --- plugin-server/tests/cdp/cdp-e2e.test.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/plugin-server/tests/cdp/cdp-e2e.test.ts b/plugin-server/tests/cdp/cdp-e2e.test.ts index dcdfdcf9954fc..ed22b3d55134c 100644 --- a/plugin-server/tests/cdp/cdp-e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-e2e.test.ts @@ -104,15 +104,10 @@ describe('CDP E2E', () => { const stoppers = [ processedEventsConsumer?.stop().then(() => console.log('Stopped processedEventsConsumer')), - , functionProcessor?.stop().then(() => console.log('Stopped functionProcessor')), - , kafkaObserver?.stop().then(() => console.log('Stopped kafkaObserver')), - , cyclotronWorker?.stop().then(() => console.log('Stopped cyclotronWorker')), - , cyclotronFetchWorker?.stop().then(() => console.log('Stopped cyclotronFetchWorker')), - , ] await Promise.all(stoppers) From d118775eb76acc5d4098fad3dbe2f83e3aacedc5 Mon Sep 17 00:00:00 2001 From: Ben White Date: Fri, 6 Sep 2024 14:32:42 +0200 Subject: [PATCH 69/75] Fixes? --- .github/workflows/ci-plugin-server.yml | 2 +- plugin-server/tests/cdp/cdp-e2e.test.ts | 1 + plugin-server/tests/cdp/helpers/kafka-observer.ts | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml index 59d0da81f97eb..03888d2268bc7 100644 --- a/.github/workflows/ci-plugin-server.yml +++ b/.github/workflows/ci-plugin-server.yml @@ -131,7 +131,7 @@ jobs: - name: Install sqlx-cli working-directory: rust - run: cargo install sqlx-cli --no-default-features --features native-tls,postgres + run: cargo install sqlx-cli@0.7.3 --no-default-features --features native-tls,postgres - name: Install SAML (python3-saml) dependencies run: | diff --git a/plugin-server/tests/cdp/cdp-e2e.test.ts b/plugin-server/tests/cdp/cdp-e2e.test.ts index ed22b3d55134c..b5423459e284e 100644 --- a/plugin-server/tests/cdp/cdp-e2e.test.ts +++ b/plugin-server/tests/cdp/cdp-e2e.test.ts @@ -29,6 +29,7 @@ jest.mock('../../src/utils/fetch', () => { const mockFetch: jest.Mock = require('../../src/utils/fetch').trackedFetch describe('CDP E2E', () => { + jest.setTimeout(10000) describe.each(['kafka', 'cyclotron'])('e2e fetch call: %s', (mode) => { let processedEventsConsumer: CdpProcessedEventsConsumer let functionProcessor: CdpFunctionCallbackConsumer diff --git a/plugin-server/tests/cdp/helpers/kafka-observer.ts b/plugin-server/tests/cdp/helpers/kafka-observer.ts index f9f0c9c61f27d..462c06fc1e137 100644 --- a/plugin-server/tests/cdp/helpers/kafka-observer.ts +++ b/plugin-server/tests/cdp/helpers/kafka-observer.ts @@ -26,7 +26,7 @@ export const createKafkaObserver = async (hub: Hub, topics: string[]): Promise ensureTopicExists(adminClient, topic, 1000))) adminClient.disconnect() - consumer.connect() + await new Promise((res, rej) => consumer.connect({}, (err) => (err ? rej(err) : res()))) consumer.subscribe(topics) const messages: { topic: string From 8043ab769cb9bb6f710baf214a68d7271a208004 Mon Sep 17 00:00:00 2001 From: Ben White Date: Fri, 6 Sep 2024 14:43:16 +0200 Subject: [PATCH 70/75] Ditch cache --- .github/workflows/ci-plugin-server.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml index 03888d2268bc7..6d5ffa26ed72f 100644 --- a/.github/workflows/ci-plugin-server.yml +++ b/.github/workflows/ci-plugin-server.yml @@ -121,13 +121,13 @@ jobs: - name: Install rust uses: dtolnay/rust-toolchain@1.77 - - uses: actions/cache@v4 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - rust/target - key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.lock') }} + # - uses: actions/cache@v4 + # with: + # path: | + # ~/.cargo/registry + # ~/.cargo/git + # rust/target + # key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.lock') }} - name: Install sqlx-cli working-directory: rust From 1edd85f0ab5bd3e949c10134fdf781479749ea8e Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 9 Sep 2024 09:24:07 +0200 Subject: [PATCH 71/75] Disable for e2e testing --- plugin-server/src/config/config.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index 942b191b39508..6a26d341421c9 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -193,7 +193,9 @@ export function getDefaultConfig(): PluginsServerConfig { CDP_CYCLOTRON_BATCH_SIZE: 500, // Cyclotron - CYCLOTRON_DATABASE_URL: isTestEnv() + CYCLOTRON_DATABASE_URL: process.env.E2E_TESTING + ? '' + : isTestEnv() ? 'postgres://posthog:posthog@localhost:5432/test_cyclotron' : isDevEnv() ? 'postgres://posthog:posthog@localhost:5432/cyclotron' From 1ca3f6a3da66622a1aa45f8214c136ac8902efb0 Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 9 Sep 2024 10:05:34 +0200 Subject: [PATCH 72/75] Fix? --- .github/workflows/ci-plugin-server.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml index 6d5ffa26ed72f..03888d2268bc7 100644 --- a/.github/workflows/ci-plugin-server.yml +++ b/.github/workflows/ci-plugin-server.yml @@ -121,13 +121,13 @@ jobs: - name: Install rust uses: dtolnay/rust-toolchain@1.77 - # - uses: actions/cache@v4 - # with: - # path: | - # ~/.cargo/registry - # ~/.cargo/git - # rust/target - # key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.lock') }} + - uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + rust/target + key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.lock') }} - name: Install sqlx-cli working-directory: rust From 26120dc0a44a7e18383c4620cbfe90e924b49182 Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 9 Sep 2024 10:18:10 +0200 Subject: [PATCH 73/75] Fixes --- plugin-server/src/config/config.ts | 4 +--- plugin-server/src/main/pluginsServer.ts | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index 6a26d341421c9..942b191b39508 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -193,9 +193,7 @@ export function getDefaultConfig(): PluginsServerConfig { CDP_CYCLOTRON_BATCH_SIZE: 500, // Cyclotron - CYCLOTRON_DATABASE_URL: process.env.E2E_TESTING - ? '' - : isTestEnv() + CYCLOTRON_DATABASE_URL: isTestEnv() ? 'postgres://posthog:posthog@localhost:5432/test_cyclotron' : isDevEnv() ? 'postgres://posthog:posthog@localhost:5432/cyclotron' diff --git a/plugin-server/src/main/pluginsServer.ts b/plugin-server/src/main/pluginsServer.ts index 1ddaa8f214e02..ff1f46b82d338 100644 --- a/plugin-server/src/main/pluginsServer.ts +++ b/plugin-server/src/main/pluginsServer.ts @@ -465,14 +465,19 @@ export async function startPluginsServer( if (capabilities.cdpCyclotronWorker) { const hub = await setupHub() - const worker = new CdpCyclotronWorker(hub) - await worker.start() - services.push(worker.service) - - if (process.env.EXPERIMENTAL_CDP_FETCH_WORKER) { - const workerFetch = new CdpCyclotronWorkerFetch(hub) - await workerFetch.start() - services.push(workerFetch.service) + + if (!hub.CYCLOTRON_DATABASE_URL) { + status.error('💥', 'Cyclotron database URL not set.') + } else { + const worker = new CdpCyclotronWorker(hub) + await worker.start() + services.push(worker.service) + + if (process.env.EXPERIMENTAL_CDP_FETCH_WORKER) { + const workerFetch = new CdpCyclotronWorkerFetch(hub) + await workerFetch.start() + services.push(workerFetch.service) + } } } From 2fd90cf4b0f714f52ed907579fc0ee1e232c87fa Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 9 Sep 2024 16:40:52 +0200 Subject: [PATCH 74/75] Fix --- bin/start-cyclotron | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/start-cyclotron b/bin/start-cyclotron index bce25042f2870..2885390287c0f 100755 --- a/bin/start-cyclotron +++ b/bin/start-cyclotron @@ -12,7 +12,7 @@ export RUST_LOG=${DEBUG:-debug} SQLX_QUERY_LEVEL=${SQLX_QUERY_LEVEL:-warn} export RUST_LOG=$RUST_LOG,sqlx::query=$SQLX_QUERY_LEVEL -export DATABASE_URL=${DATABASE_URL:-postgres://posthog:posthog@localhost:5432/cyclotron} +export DATABASE_URL=${CYCLOTRON_DATABASE_URL:-postgres://posthog:posthog@localhost:5432/cyclotron} export ALLOW_INTERNAL_IPS=${ALLOW_INTERNAL_IPS:-true} ./target/debug/cyclotron-fetch & From 74e977eb8c209b24b8207e0b6753d7de64a57832 Mon Sep 17 00:00:00 2001 From: Ben White Date: Mon, 9 Sep 2024 16:42:03 +0200 Subject: [PATCH 75/75] Fix --- plugin-server/src/cdp/cdp-consumers.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index 219ea920e78fe..f75b2a23096e5 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -431,7 +431,7 @@ export class CdpProcessedEventsConsumer extends CdpConsumerBase { const invocationResults = await runInstrumentedFunction({ statsKey: `cdpConsumer.handleEachBatch.executeInvocations`, func: async () => { - const hogResults = await this.runManyWithHeartbeat(invocationsToBeQueued, (item) => + const hogResults = await this.runManyWithHeartbeat(kafkaInvocations, (item) => this.hogExecutor.execute(item) ) return [...hogResults]