From ee439fb88347daeafec79ace530f092cd87e9a70 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Thu, 24 Aug 2023 17:57:13 +0200 Subject: [PATCH 01/10] chore(plugin-server): Validate fetch hostnames --- plugin-server/package.json | 1 + plugin-server/pnpm-lock.yaml | 8 +++ plugin-server/src/utils/fetch.ts | 56 +++++++++++++++++++-- plugin-server/src/worker/ingestion/hooks.ts | 6 +-- plugin-server/src/worker/plugins/mmdb.ts | 2 +- plugin-server/src/worker/vm/imports.ts | 4 +- plugin-server/tests/utils/fetch.test.ts | 23 +++++++++ posthog/api/test/test_utils.py | 2 + posthog/api/utils.py | 10 +++- 9 files changed, 99 insertions(+), 13 deletions(-) create mode 100644 plugin-server/tests/utils/fetch.test.ts diff --git a/plugin-server/package.json b/plugin-server/package.json index 3183cf141dd40..ee564b675badc 100644 --- a/plugin-server/package.json +++ b/plugin-server/package.json @@ -125,6 +125,7 @@ "eslint-plugin-prettier": "^4.2.1", "eslint-plugin-promise": "^6.0.0", "eslint-plugin-simple-import-sort": "^7.0.0", + "ipaddr.js": "^2.1.0", "jest": "^28.1.1", "nodemon": "^2.0.22", "parse-prometheus-text-format": "^1.1.1", diff --git a/plugin-server/pnpm-lock.yaml b/plugin-server/pnpm-lock.yaml index 355ade1e4c556..25ff045dbe652 100644 --- a/plugin-server/pnpm-lock.yaml +++ b/plugin-server/pnpm-lock.yaml @@ -276,6 +276,9 @@ devDependencies: eslint-plugin-simple-import-sort: specifier: ^7.0.0 version: 7.0.0(eslint@8.39.0) + ipaddr.js: + specifier: ^2.1.0 + version: 2.1.0 jest: specifier: ^28.1.1 version: 28.1.3(@types/node@16.18.25)(ts-node@10.9.1) @@ -7365,6 +7368,11 @@ packages: resolution: {integrity: sha512-WKa+XuLG1A1R0UWhl2+1XQSi+fZWMsYKffMZTTYsiZaUD8k2yDAj5atimTUD2TZkyCkNEeYE5NhFZmupOGtjYQ==} dev: false + /ipaddr.js@2.1.0: + resolution: {integrity: sha512-LlbxQ7xKzfBusov6UMi4MFpEg0m+mAm9xyNGEduwXMEDuf4WfzB/RZwMVYEd7IKGvh4IUkEXYxtAVu9T3OelJQ==} + engines: {node: '>= 10'} + dev: true + /is-arguments@1.1.1: resolution: {integrity: sha512-8Q7EARjzEnKpt/PCD7e1cgUS0a6X8u5tdSiMqXhojOdoV9TsMsiO+9VLC5vAmO8N7/GmXn7yjR8qnA6bVAEzfA==} engines: {node: '>= 0.4'} diff --git a/plugin-server/src/utils/fetch.ts b/plugin-server/src/utils/fetch.ts index c45166edd0d1c..ce0f0c7b5529a 100644 --- a/plugin-server/src/utils/fetch.ts +++ b/plugin-server/src/utils/fetch.ts @@ -1,21 +1,67 @@ // This module wraps node-fetch with a sentry tracing-aware extension +import { LookupAddress } from 'dns' +import dns from 'dns/promises' +import * as ipaddr from 'ipaddr.js' import fetch, { FetchError, Request, Response } from 'node-fetch' +import { URL } from 'url' import { runInSpan } from '../sentry' -function fetchWrapper(...args: Parameters): Promise { +export function filteredFetch(...args: Parameters): Promise { const request = new Request(...args) return runInSpan( { op: 'fetch', description: `${request.method} ${request.url}`, }, - () => fetch(...args) + async () => { + await raiseIfUserProvidedUrlUnsafe(request.url) + return await fetch(...args) + } ) } -fetchWrapper.isRedirect = fetch.isRedirect -fetchWrapper.FetchError = FetchError +filteredFetch.isRedirect = fetch.isRedirect +filteredFetch.FetchError = FetchError -export default fetchWrapper +/** + * Raise if the provided URL seems unsafe, otherwise do nothing. + * + * Equivalent of Django raise_if_user_provided_url_unsafe. + */ +export async function raiseIfUserProvidedUrlUnsafe(url: string): Promise { + // Raise if the provided URL seems unsafe, otherwise do nothing. + let parsedUrl: URL + try { + parsedUrl = new URL(url) + } catch (err) { + throw new FetchError('Invalid URL', 'posthog-host-guard') + } + if (!parsedUrl.hostname) { + throw new FetchError('No hostname', 'posthog-host-guard') + } + let port: URL['port'] + if (parsedUrl.protocol === 'http:') { + port = '80' + } else if (parsedUrl.protocol === 'https:') { + port = '443' + } else { + throw new FetchError('Protocol must be either HTTP or HTTPS', 'posthog-host-guard') + } + if (parsedUrl.port.length > 0 && parsedUrl.port !== port) { + throw new FetchError('Port does not match protocol', 'posthog-host-guard') + } + let addrinfo: LookupAddress[] + try { + addrinfo = await dns.lookup(parsedUrl.hostname, { all: true }) + } catch (err) { + throw new FetchError('Invalid hostname', 'posthog-host-guard') + } + for (const { address } of addrinfo) { + // Prevent addressing internal services + if (ipaddr.parse(address).range() !== 'unicast') { + throw new FetchError('Invalid hostname', 'posthog-host-guard') + } + } +} diff --git a/plugin-server/src/worker/ingestion/hooks.ts b/plugin-server/src/worker/ingestion/hooks.ts index e3c15a9329135..d7f39b0a5b575 100644 --- a/plugin-server/src/worker/ingestion/hooks.ts +++ b/plugin-server/src/worker/ingestion/hooks.ts @@ -5,7 +5,7 @@ import { format } from 'util' import { Action, Hook, PostIngestionEvent, Team } from '../../types' import { PostgresRouter, PostgresUse } from '../../utils/db/postgres' -import fetch from '../../utils/fetch' +import { filteredFetch } from '../../utils/fetch' import { status } from '../../utils/status' import { getPropertyValueByPath, stringify } from '../../utils/utils' import { OrganizationManager } from './organization-manager' @@ -360,7 +360,7 @@ export class HookCommander { }, 5000) try { await instrumentWebhookStep('fetch', async () => { - const request = await fetch(webhookUrl, { + const request = await filteredFetch(webhookUrl, { method: 'POST', body: JSON.stringify(message, undefined, 4), headers: { 'Content-Type': 'application/json' }, @@ -400,7 +400,7 @@ export class HookCommander { ) }, 5000) try { - const request = await fetch(hook.target, { + const request = await filteredFetch(hook.target, { method: 'POST', body: JSON.stringify(payload, undefined, 4), headers: { 'Content-Type': 'application/json' }, diff --git a/plugin-server/src/worker/plugins/mmdb.ts b/plugin-server/src/worker/plugins/mmdb.ts index a825c931c4da2..7321238b2ba31 100644 --- a/plugin-server/src/worker/plugins/mmdb.ts +++ b/plugin-server/src/worker/plugins/mmdb.ts @@ -1,5 +1,6 @@ import { Reader, ReaderModel } from '@maxmind/geoip2-node' import { DateTime } from 'luxon' +import fetch from 'node-fetch' import * as schedule from 'node-schedule' import prettyBytes from 'pretty-bytes' import { brotliDecompress } from 'zlib' @@ -12,7 +13,6 @@ import { } from '../../config/mmdb-constants' import { Hub, PluginAttachmentDB } from '../../types' import { PostgresUse } from '../../utils/db/postgres' -import fetch from '../../utils/fetch' import { status } from '../../utils/status' import { delay } from '../../utils/utils' diff --git a/plugin-server/src/worker/vm/imports.ts b/plugin-server/src/worker/vm/imports.ts index bcb9648974934..b630ba10dca08 100644 --- a/plugin-server/src/worker/vm/imports.ts +++ b/plugin-server/src/worker/vm/imports.ts @@ -15,7 +15,7 @@ import { PassThrough } from 'stream' import * as url from 'url' import * as zlib from 'zlib' -import fetch from '../../utils/fetch' +import { filteredFetch } from '../../utils/fetch' import { writeToFile } from './extensions/test-utils' export const imports = { @@ -32,7 +32,7 @@ export const imports = { 'aws-sdk': AWS, ethers: ethers, 'generic-pool': genericPool, - 'node-fetch': fetch, + 'node-fetch': filteredFetch, 'snowflake-sdk': snowflake, crypto: crypto, jsonwebtoken: jsonwebtoken, diff --git a/plugin-server/tests/utils/fetch.test.ts b/plugin-server/tests/utils/fetch.test.ts new file mode 100644 index 0000000000000..2eeb333c82ee9 --- /dev/null +++ b/plugin-server/tests/utils/fetch.test.ts @@ -0,0 +1,23 @@ +import { filteredFetch, raiseIfUserProvidedUrlUnsafe } from '../../src/utils/fetch' + +const { FetchError } = filteredFetch + +test('raiseIfUserProvidedUrlUnsafe', async () => { + // Sync test cases with posthog/api/test/test_utils.py + await raiseIfUserProvidedUrlUnsafe('https://google.com?q=20') // Safe + await raiseIfUserProvidedUrlUnsafe('https://posthog.com') // Safe + await raiseIfUserProvidedUrlUnsafe('https://posthog.com/foo/bar') // Safe, with path + await raiseIfUserProvidedUrlUnsafe('https://posthog.com:443') // Safe, good port + await raiseIfUserProvidedUrlUnsafe('https://1.1.1.1') // Safe, public IP + await expect(() => raiseIfUserProvidedUrlUnsafe('https://posthog.com:80')).rejects.toBeInstanceOf(FetchError) // Bad port + await expect(raiseIfUserProvidedUrlUnsafe('ftp://posthog.com')).rejects.toBeInstanceOf(FetchError) // Bad scheme + await expect(raiseIfUserProvidedUrlUnsafe('')).rejects.toBeInstanceOf(FetchError) // Empty + await expect(raiseIfUserProvidedUrlUnsafe('@@@')).rejects.toBeInstanceOf(FetchError) // Invalid format + await expect(raiseIfUserProvidedUrlUnsafe('posthog.com')).rejects.toBeInstanceOf(FetchError) // No scheme + await expect(raiseIfUserProvidedUrlUnsafe('http://localhost')).rejects.toBeInstanceOf(FetchError) // Internal + await expect(raiseIfUserProvidedUrlUnsafe('http://192.168.0.5')).rejects.toBeInstanceOf(FetchError) // Internal + await expect(raiseIfUserProvidedUrlUnsafe('http://0.0.0.0')).rejects.toBeInstanceOf(FetchError) // Internal + await expect(raiseIfUserProvidedUrlUnsafe('http://10.0.0.24')).rejects.toBeInstanceOf(FetchError) // Internal + await expect(raiseIfUserProvidedUrlUnsafe('http://172.20.0.21')).rejects.toBeInstanceOf(FetchError) // Internal + await expect(raiseIfUserProvidedUrlUnsafe('http://fgtggggzzggggfd.com')).rejects.toBeInstanceOf(FetchError) // Non-existent +}) diff --git a/posthog/api/test/test_utils.py b/posthog/api/test/test_utils.py index c34aa06dac9a7..57b67991c3bd5 100644 --- a/posthog/api/test/test_utils.py +++ b/posthog/api/test/test_utils.py @@ -147,6 +147,7 @@ def test_safe_clickhouse_string_unicode_non_surrogates(self): self.assertEqual(safe_clickhouse_string("💜 \u1f49c\ 💜"), "💜 \u1f49c\ 💜") def test_raise_if_user_provided_url_unsafe(self): + # Sync test cases with plugin-server/src/utils/fetch.test.ts raise_if_user_provided_url_unsafe("https://google.com?q=20") # Safe raise_if_user_provided_url_unsafe("https://posthog.com") # Safe raise_if_user_provided_url_unsafe("https://posthog.com/foo/bar") # Safe, with path @@ -155,6 +156,7 @@ def test_raise_if_user_provided_url_unsafe(self): self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("https://posthog.com:80")) # Bad port self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("ftp://posthog.com")) # Bad scheme self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("")) # Empty + self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("@@@")) # Invalid format self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("posthog.com")) # No scheme self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("http://localhost")) # Internal self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("http://192.168.0.5")) # Internal diff --git a/posthog/api/utils.py b/posthog/api/utils.py index 2a9991ea37f4e..5bcfe2918dc0f 100644 --- a/posthog/api/utils.py +++ b/posthog/api/utils.py @@ -302,8 +302,14 @@ def parse_bool(value: Union[str, List[str]]) -> bool: def raise_if_user_provided_url_unsafe(url: str): - """Raise if the provided URL seems unsafe, otherwise do nothing.""" - parsed_url: urllib.parse.ParseResult = urllib.parse.urlparse(url) + """Raise if the provided URL seems unsafe, otherwise do nothing. + + Equivalent of plugin server raiseIfUserProvidedUrlUnsafe. + """ + try: + parsed_url: urllib.parse.ParseResult = urllib.parse.urlparse(url) + except ValueError: + raise ValueError("Invalid URL") if not parsed_url.hostname: raise ValueError("No hostname") if parsed_url.scheme == "http": From 6a4657ea639df862e1c3f2a07bb795ef9cddb1fd Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Thu, 24 Aug 2023 19:10:04 +0200 Subject: [PATCH 02/10] Only apply Python host check on Cloud --- posthog/api/user.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/posthog/api/user.py b/posthog/api/user.py index 0562943760244..4fbb85e8ca1a0 100644 --- a/posthog/api/user.py +++ b/posthog/api/user.py @@ -33,6 +33,7 @@ from posthog.api.shared import OrganizationBasicSerializer, TeamBasicSerializer from posthog.api.utils import raise_if_user_provided_url_unsafe from posthog.auth import authenticate_secondarily +from posthog.cloud_utils import is_cloud from posthog.email import is_email_available from posthog.event_usage import report_user_logged_in, report_user_updated, report_user_verified_email from posthog.models import Team, User, UserScenePersonalisation, Dashboard @@ -450,7 +451,8 @@ def test_slack_webhook(request): return JsonResponse({"error": "no webhook URL"}) message = {"text": "_Greetings_ from PostHog!"} try: - raise_if_user_provided_url_unsafe(webhook) + if is_cloud(): # Protect against SSRF + raise_if_user_provided_url_unsafe(webhook) response = requests.post(webhook, verify=False, json=message) if response.ok: From 74710979e1997e4b14a21b6131aa0f129693d0c7 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Thu, 24 Aug 2023 19:30:48 +0200 Subject: [PATCH 03/10] Update tests to use valid hook URLs --- plugin-server/tests/main/db.test.ts | 8 ++++---- .../event-pipeline/event-pipeline-integration.test.ts | 4 ++-- plugin-server/tests/worker/ingestion/hooks.test.ts | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/plugin-server/tests/main/db.test.ts b/plugin-server/tests/main/db.test.ts index 06c23380448e3..a2a570ce0af07 100644 --- a/plugin-server/tests/main/db.test.ts +++ b/plugin-server/tests/main/db.test.ts @@ -165,7 +165,7 @@ describe('DB', () => { user_id: 1001, resource_id: 69, event: 'action_performed', - target: 'https://rest-hooks.example.com/', + target: 'https://example.com/', created: new Date().toISOString(), updated: new Date().toISOString(), }) @@ -188,7 +188,7 @@ describe('DB', () => { team_id: 2, resource_id: 69, event: 'action_performed', - target: 'https://rest-hooks.example.com/', + target: 'https://example.com/', }, ], bytecode: null, @@ -226,7 +226,7 @@ describe('DB', () => { user_id: 1001, resource_id: 69, event: 'event_performed', - target: 'https://rest-hooks.example.com/', + target: 'https://example.com/', created: new Date().toISOString(), updated: new Date().toISOString(), }) @@ -236,7 +236,7 @@ describe('DB', () => { user_id: 1001, resource_id: 70, event: 'event_performed', - target: 'https://rest-hooks.example.com/', + target: 'https://example.com/', created: new Date().toISOString(), updated: new Date().toISOString(), }) diff --git a/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts b/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts index 837079da765eb..ddc4cb96af4bb 100644 --- a/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts +++ b/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts @@ -174,7 +174,7 @@ describe('Event Pipeline integration test', () => { user_id: commonUserId, resource_id: 69, event: 'action_performed', - target: 'https://rest-hooks.example.com/', + target: 'https://example.com/', created: timestamp, updated: timestamp, } as Hook) @@ -198,7 +198,7 @@ describe('Event Pipeline integration test', () => { hook: { id: 'abc', event: 'action_performed', - target: 'https://rest-hooks.example.com/', + target: 'https://example.com/', }, data: { event: 'xyz', diff --git a/plugin-server/tests/worker/ingestion/hooks.test.ts b/plugin-server/tests/worker/ingestion/hooks.test.ts index c319ba01c3bb9..25106ca60a3f2 100644 --- a/plugin-server/tests/worker/ingestion/hooks.test.ts +++ b/plugin-server/tests/worker/ingestion/hooks.test.ts @@ -478,7 +478,7 @@ describe('hooks', () => { user_id: 1, resource_id: 1, event: 'foo', - target: 'foo.bar', + target: 'https://example.com/', created: new Date().toISOString(), updated: new Date().toISOString(), } @@ -493,7 +493,7 @@ describe('hooks', () => { hook: { id: 'id', event: 'foo', - target: 'foo.bar', + target: 'https://example.com/', }, data: { event: 'foo', @@ -525,7 +525,7 @@ describe('hooks', () => { hook: { id: 'id', event: 'foo', - target: 'foo.bar', + target: 'https://example.com/', }, data: { event: 'foo', From b80ddc605e45b854ee32c957317f42cc10d94454 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Thu, 24 Aug 2023 19:31:07 +0200 Subject: [PATCH 04/10] Only apply plugin server host check in prod --- plugin-server/src/utils/fetch.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/plugin-server/src/utils/fetch.ts b/plugin-server/src/utils/fetch.ts index ce0f0c7b5529a..c1cf5c7da2583 100644 --- a/plugin-server/src/utils/fetch.ts +++ b/plugin-server/src/utils/fetch.ts @@ -7,6 +7,7 @@ import fetch, { FetchError, Request, Response } from 'node-fetch' import { URL } from 'url' import { runInSpan } from '../sentry' +import { isProdEnv } from './env-utils' export function filteredFetch(...args: Parameters): Promise { const request = new Request(...args) @@ -16,7 +17,10 @@ export function filteredFetch(...args: Parameters): Promise { - await raiseIfUserProvidedUrlUnsafe(request.url) + if (isProdEnv()) { + // TODO: Only run this check on Cloud + await raiseIfUserProvidedUrlUnsafe(request.url) + } return await fetch(...args) } ) From 42ab93cf470c74c0492ac71f7b221a4cd229056a Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 25 Aug 2023 10:52:47 +0200 Subject: [PATCH 05/10] Update URLs in a couple more tests --- .../event-pipeline/event-pipeline-integration.test.ts | 2 +- plugin-server/tests/worker/ingestion/hooks.test.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts b/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts index ddc4cb96af4bb..ac456a076276a 100644 --- a/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts +++ b/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts @@ -222,7 +222,7 @@ describe('Event Pipeline integration test', () => { // Using a more verbose way instead of toHaveBeenCalledWith because we need to parse request body // and use expect.any for a few payload properties, which wouldn't be possible in a simpler way - expect(jest.mocked(fetch).mock.calls[0][0]).toBe('https://rest-hooks.example.com/') + expect(jest.mocked(fetch).mock.calls[0][0]).toBe('https://example.com/') const secondArg = jest.mocked(fetch).mock.calls[0][1] expect(JSON.parse(secondArg!.body as unknown as string)).toStrictEqual(expectedPayload) expect(JSON.parse(secondArg!.body as unknown as string)).toStrictEqual(expectedPayload) diff --git a/plugin-server/tests/worker/ingestion/hooks.test.ts b/plugin-server/tests/worker/ingestion/hooks.test.ts index 25106ca60a3f2..ef333092c96bb 100644 --- a/plugin-server/tests/worker/ingestion/hooks.test.ts +++ b/plugin-server/tests/worker/ingestion/hooks.test.ts @@ -487,7 +487,7 @@ describe('hooks', () => { test('person = undefined', async () => { await hookCommander.postRestHook(hook, { event: 'foo' } as any) - expect(fetch).toHaveBeenCalledWith('foo.bar', { + expect(fetch).toHaveBeenCalledWith('https://example.com/', { body: JSON.stringify( { hook: { @@ -519,7 +519,7 @@ describe('hooks', () => { person_properties: { foo: 'bar' }, person_created_at: DateTime.fromISO(now).toUTC(), } as any) - expect(fetch).toHaveBeenCalledWith('foo.bar', { + expect(fetch).toHaveBeenCalledWith('https://example.com/', { body: JSON.stringify( { hook: { From eb321fc0e0b9c493e0e858365f26a3d4ff0b0e37 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 25 Aug 2023 13:05:34 +0200 Subject: [PATCH 06/10] Only check hostnames on Cloud and remove port check --- plugin-server/src/config/config.ts | 2 +- plugin-server/src/types.ts | 3 ++- plugin-server/src/utils/env-utils.ts | 2 ++ posthog/api/test/test_utils.py | 37 +++++++++++++++++++--------- posthog/api/utils.py | 17 +++---------- 5 files changed, 34 insertions(+), 27 deletions(-) diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index 39937716987d6..1fcdddc774352 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -123,7 +123,7 @@ export function getDefaultConfig(): PluginsServerConfig { APP_METRICS_GATHERED_FOR_ALL: isDevEnv() ? true : false, MAX_TEAM_ID_TO_BUFFER_ANONYMOUS_EVENTS_FOR: 0, USE_KAFKA_FOR_SCHEDULED_TASKS: true, - CLOUD_DEPLOYMENT: 'default', // Used as a Sentry tag + CLOUD_DEPLOYMENT: null, SESSION_RECORDING_KAFKA_HOSTS: undefined, SESSION_RECORDING_KAFKA_SECURITY_PROTOCOL: undefined, diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index fd497888ca501..82c67d8b1b2ac 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -198,7 +198,8 @@ export interface PluginsServerConfig { USE_KAFKA_FOR_SCHEDULED_TASKS: boolean // distribute scheduled tasks across the scheduler workers EVENT_OVERFLOW_BUCKET_CAPACITY: number EVENT_OVERFLOW_BUCKET_REPLENISH_RATE: number - CLOUD_DEPLOYMENT: string + /** Label of the PostHog Cloud environment. Null if not running PostHog Cloud. @example 'US' */ + CLOUD_DEPLOYMENT: string | null // local directory might be a volume mount or a directory on disk (e.g. in local dev) SESSION_RECORDING_LOCAL_DIRECTORY: string diff --git a/plugin-server/src/utils/env-utils.ts b/plugin-server/src/utils/env-utils.ts index 0b343f09fc8e7..4c2ab7d173183 100644 --- a/plugin-server/src/utils/env-utils.ts +++ b/plugin-server/src/utils/env-utils.ts @@ -40,6 +40,8 @@ export const isTestEnv = (): boolean => determineNodeEnv() === NodeEnv.Test export const isDevEnv = (): boolean => determineNodeEnv() === NodeEnv.Development export const isProdEnv = (): boolean => determineNodeEnv() === NodeEnv.Production +export const isCloud = (): boolean => !!process.env.CLOUD_DEPLOYMENT + export function isIngestionOverflowEnabled(): boolean { const ingestionOverflowEnabled = process.env.INGESTION_OVERFLOW_ENABLED return stringToBoolean(ingestionOverflowEnabled) diff --git a/posthog/api/test/test_utils.py b/posthog/api/test/test_utils.py index 57b67991c3bd5..84a3d7315c220 100644 --- a/posthog/api/test/test_utils.py +++ b/posthog/api/test/test_utils.py @@ -153,16 +153,29 @@ def test_raise_if_user_provided_url_unsafe(self): raise_if_user_provided_url_unsafe("https://posthog.com/foo/bar") # Safe, with path raise_if_user_provided_url_unsafe("https://posthog.com:443") # Safe, good port raise_if_user_provided_url_unsafe("https://1.1.1.1") # Safe, public IP - self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("https://posthog.com:80")) # Bad port - self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("ftp://posthog.com")) # Bad scheme - self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("")) # Empty - self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("@@@")) # Invalid format - self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("posthog.com")) # No scheme - self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("http://localhost")) # Internal - self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("http://192.168.0.5")) # Internal - self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("http://0.0.0.0")) # Internal - self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("http://10.0.0.24")) # Internal - self.assertRaises(ValueError, lambda: raise_if_user_provided_url_unsafe("http://172.20.0.21")) # Internal - self.assertRaises( - ValueError, lambda: raise_if_user_provided_url_unsafe("http://fgtggggzzggggfd.com") + self.assertRaisesMessage(ValueError, "No hostname", lambda: raise_if_user_provided_url_unsafe("")) + self.assertRaisesMessage(ValueError, "No hostname", lambda: raise_if_user_provided_url_unsafe("@@@")) + self.assertRaisesMessage(ValueError, "No hostname", lambda: raise_if_user_provided_url_unsafe("posthog.com")) + self.assertRaisesMessage( + ValueError, + "Scheme must be either HTTP or HTTPS", + lambda: raise_if_user_provided_url_unsafe("ftp://posthog.com"), + ) + self.assertRaisesMessage( + ValueError, "Internal hostname", lambda: raise_if_user_provided_url_unsafe("http://localhost") + ) + self.assertRaisesMessage( + ValueError, "Internal hostname", lambda: raise_if_user_provided_url_unsafe("http://192.168.0.5") + ) + self.assertRaisesMessage( + ValueError, "Internal hostname", lambda: raise_if_user_provided_url_unsafe("http://0.0.0.0") + ) + self.assertRaisesMessage( + ValueError, "Internal hostname", lambda: raise_if_user_provided_url_unsafe("http://10.0.0.24") + ) + self.assertRaisesMessage( + ValueError, "Internal hostname", lambda: raise_if_user_provided_url_unsafe("http://172.20.0.21") + ) + self.assertRaisesMessage( + ValueError, "Invalid hostname", lambda: raise_if_user_provided_url_unsafe("http://fgtggggzzggggfd.com") ) # Non-existent diff --git a/posthog/api/utils.py b/posthog/api/utils.py index 5bcfe2918dc0f..298908f1ccbe1 100644 --- a/posthog/api/utils.py +++ b/posthog/api/utils.py @@ -306,25 +306,16 @@ def raise_if_user_provided_url_unsafe(url: str): Equivalent of plugin server raiseIfUserProvidedUrlUnsafe. """ - try: - parsed_url: urllib.parse.ParseResult = urllib.parse.urlparse(url) - except ValueError: - raise ValueError("Invalid URL") + parsed_url: urllib.parse.ParseResult = urllib.parse.urlparse(url) # urlparse never raises errors if not parsed_url.hostname: raise ValueError("No hostname") - if parsed_url.scheme == "http": - port = 80 - elif parsed_url.scheme == "https": - port = 443 - else: + if parsed_url.scheme not in ("http", "https"): raise ValueError("Scheme must be either HTTP or HTTPS") - if parsed_url.port is not None and parsed_url.port != port: - raise ValueError("Port does not match scheme") # Disallow if hostname resolves to a private (internal) IP address try: - addrinfo = socket.getaddrinfo(parsed_url.hostname, port) + addrinfo = socket.getaddrinfo(parsed_url.hostname, None) except socket.gaierror: raise ValueError("Invalid hostname") for _, _, _, _, sockaddr in addrinfo: if ip_address(sockaddr[0]).is_private: # Prevent addressing internal services - raise ValueError("Invalid hostname") + raise ValueError("Internal hostname") From 8f9883a27741bf57985aab72aee87fdf14d92efd Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 25 Aug 2023 13:07:18 +0200 Subject: [PATCH 07/10] Fix fetch mocking --- plugin-server/jest.setup.fetch-mock.js | 8 +++- plugin-server/src/utils/fetch.ts | 20 +++------- plugin-server/tests/utils/fetch.test.ts | 39 +++++++++++++------ .../tests/worker/ingestion/hooks.test.ts | 22 ++++++++++- .../tests/worker/plugins/mmdb.test.ts | 2 +- .../tests/worker/vm.extra-lazy.test.ts | 2 +- plugin-server/tests/worker/vm.test.ts | 2 +- 7 files changed, 64 insertions(+), 31 deletions(-) diff --git a/plugin-server/jest.setup.fetch-mock.js b/plugin-server/jest.setup.fetch-mock.js index 151debe7538c5..60e240a898220 100644 --- a/plugin-server/jest.setup.fetch-mock.js +++ b/plugin-server/jest.setup.fetch-mock.js @@ -6,7 +6,11 @@ import fetch from 'node-fetch' import { status } from './src/utils/status' -jest.mock('node-fetch') +jest.mock('node-fetch', () => ({ + __esModule: true, + ...jest.requireActual('node-fetch'), // Only mock fetch(), leave Request, Response, FetchError, etc. alone + default: jest.fn(), +})) beforeEach(() => { const responsesToUrls = { @@ -21,7 +25,7 @@ beforeEach(() => { ]), } - fetch.mockImplementation( + jest.mocked(fetch).mockImplementation( (url, options) => new Promise((resolve) => resolve({ diff --git a/plugin-server/src/utils/fetch.ts b/plugin-server/src/utils/fetch.ts index c1cf5c7da2583..75f425189ff58 100644 --- a/plugin-server/src/utils/fetch.ts +++ b/plugin-server/src/utils/fetch.ts @@ -7,7 +7,7 @@ import fetch, { FetchError, Request, Response } from 'node-fetch' import { URL } from 'url' import { runInSpan } from '../sentry' -import { isProdEnv } from './env-utils' +import { isCloud } from './env-utils' export function filteredFetch(...args: Parameters): Promise { const request = new Request(...args) @@ -17,8 +17,8 @@ export function filteredFetch(...args: Parameters): Promise { - if (isProdEnv()) { - // TODO: Only run this check on Cloud + if (isCloud()) { + console.log(args, request.url, request.method) await raiseIfUserProvidedUrlUnsafe(request.url) } return await fetch(...args) @@ -45,16 +45,8 @@ export async function raiseIfUserProvidedUrlUnsafe(url: string): Promise { if (!parsedUrl.hostname) { throw new FetchError('No hostname', 'posthog-host-guard') } - let port: URL['port'] - if (parsedUrl.protocol === 'http:') { - port = '80' - } else if (parsedUrl.protocol === 'https:') { - port = '443' - } else { - throw new FetchError('Protocol must be either HTTP or HTTPS', 'posthog-host-guard') - } - if (parsedUrl.port.length > 0 && parsedUrl.port !== port) { - throw new FetchError('Port does not match protocol', 'posthog-host-guard') + if (parsedUrl.protocol !== 'http:' && parsedUrl.protocol !== 'https:') { + throw new FetchError('Scheme must be either HTTP or HTTPS', 'posthog-host-guard') } let addrinfo: LookupAddress[] try { @@ -65,7 +57,7 @@ export async function raiseIfUserProvidedUrlUnsafe(url: string): Promise { for (const { address } of addrinfo) { // Prevent addressing internal services if (ipaddr.parse(address).range() !== 'unicast') { - throw new FetchError('Invalid hostname', 'posthog-host-guard') + throw new FetchError('Internal hostname', 'posthog-host-guard') } } } diff --git a/plugin-server/tests/utils/fetch.test.ts b/plugin-server/tests/utils/fetch.test.ts index 2eeb333c82ee9..805d2dda8e778 100644 --- a/plugin-server/tests/utils/fetch.test.ts +++ b/plugin-server/tests/utils/fetch.test.ts @@ -9,15 +9,32 @@ test('raiseIfUserProvidedUrlUnsafe', async () => { await raiseIfUserProvidedUrlUnsafe('https://posthog.com/foo/bar') // Safe, with path await raiseIfUserProvidedUrlUnsafe('https://posthog.com:443') // Safe, good port await raiseIfUserProvidedUrlUnsafe('https://1.1.1.1') // Safe, public IP - await expect(() => raiseIfUserProvidedUrlUnsafe('https://posthog.com:80')).rejects.toBeInstanceOf(FetchError) // Bad port - await expect(raiseIfUserProvidedUrlUnsafe('ftp://posthog.com')).rejects.toBeInstanceOf(FetchError) // Bad scheme - await expect(raiseIfUserProvidedUrlUnsafe('')).rejects.toBeInstanceOf(FetchError) // Empty - await expect(raiseIfUserProvidedUrlUnsafe('@@@')).rejects.toBeInstanceOf(FetchError) // Invalid format - await expect(raiseIfUserProvidedUrlUnsafe('posthog.com')).rejects.toBeInstanceOf(FetchError) // No scheme - await expect(raiseIfUserProvidedUrlUnsafe('http://localhost')).rejects.toBeInstanceOf(FetchError) // Internal - await expect(raiseIfUserProvidedUrlUnsafe('http://192.168.0.5')).rejects.toBeInstanceOf(FetchError) // Internal - await expect(raiseIfUserProvidedUrlUnsafe('http://0.0.0.0')).rejects.toBeInstanceOf(FetchError) // Internal - await expect(raiseIfUserProvidedUrlUnsafe('http://10.0.0.24')).rejects.toBeInstanceOf(FetchError) // Internal - await expect(raiseIfUserProvidedUrlUnsafe('http://172.20.0.21')).rejects.toBeInstanceOf(FetchError) // Internal - await expect(raiseIfUserProvidedUrlUnsafe('http://fgtggggzzggggfd.com')).rejects.toBeInstanceOf(FetchError) // Non-existent + await expect(raiseIfUserProvidedUrlUnsafe('')).rejects.toThrow(new FetchError('Invalid URL', 'posthog-host-guard')) + await expect(raiseIfUserProvidedUrlUnsafe('@@@')).rejects.toThrow( + new FetchError('Invalid URL', 'posthog-host-guard') + ) + await expect(raiseIfUserProvidedUrlUnsafe('posthog.com')).rejects.toThrow( + new FetchError('Invalid URL', 'posthog-host-guard') + ) + await expect(raiseIfUserProvidedUrlUnsafe('ftp://posthog.com')).rejects.toThrow( + new FetchError('Scheme must be either HTTP or HTTPS', 'posthog-host-guard') + ) + await expect(raiseIfUserProvidedUrlUnsafe('http://localhost')).rejects.toThrow( + new FetchError('Internal hostname', 'posthog-host-guard') + ) + await expect(raiseIfUserProvidedUrlUnsafe('http://192.168.0.5')).rejects.toThrow( + new FetchError('Internal hostname', 'posthog-host-guard') + ) + await expect(raiseIfUserProvidedUrlUnsafe('http://0.0.0.0')).rejects.toThrow( + new FetchError('Internal hostname', 'posthog-host-guard') + ) + await expect(raiseIfUserProvidedUrlUnsafe('http://10.0.0.24')).rejects.toThrow( + new FetchError('Internal hostname', 'posthog-host-guard') + ) + await expect(raiseIfUserProvidedUrlUnsafe('http://172.20.0.21')).rejects.toThrow( + new FetchError('Internal hostname', 'posthog-host-guard') + ) + await expect(raiseIfUserProvidedUrlUnsafe('http://fgtggggzzggggfd.com')).rejects.toThrow( + new FetchError('Invalid hostname', 'posthog-host-guard') + ) }) diff --git a/plugin-server/tests/worker/ingestion/hooks.test.ts b/plugin-server/tests/worker/ingestion/hooks.test.ts index ef333092c96bb..83c7722587cec 100644 --- a/plugin-server/tests/worker/ingestion/hooks.test.ts +++ b/plugin-server/tests/worker/ingestion/hooks.test.ts @@ -1,7 +1,8 @@ import { DateTime } from 'luxon' -import * as fetch from 'node-fetch' +import fetch, { FetchError } from 'node-fetch' import { Action, PostIngestionEvent, Team } from '../../../src/types' +import { isCloud } from '../../../src/utils/env-utils' import { UUIDT } from '../../../src/utils/utils' import { determineWebhookType, @@ -15,6 +16,8 @@ import { } from '../../../src/worker/ingestion/hooks' import { Hook } from './../../../src/types' +jest.mock('../../../src/utils/env-utils') + describe('hooks', () => { describe('determineWebhookType', () => { test('Slack', () => { @@ -471,6 +474,7 @@ describe('hooks', () => { let hook: Hook beforeEach(() => { + jest.mocked(isCloud).mockReturnValue(false) // Disable private IP guard hookCommander = new HookCommander({} as any, {} as any, {} as any) hook = { id: 'id', @@ -510,6 +514,8 @@ describe('hooks', () => { }) test('person data from the event', async () => { + jest.mocked(isCloud).mockReturnValue(true) // Enable private IP guard, which example.com should pass + const now = new Date().toISOString() const uuid = new UUIDT().toString() await hookCommander.postRestHook(hook, { @@ -545,5 +551,19 @@ describe('hooks', () => { timeout: 10000, }) }) + + test('private IP hook allowed on self-hosted', async () => { + await hookCommander.postRestHook({ ...hook, target: 'http://127.0.0.1' }, { event: 'foo' } as any) + + expect(fetch).toHaveBeenCalledWith('http://127.0.0.1', expect.anything()) + }) + + test('private IP hook forbidden on Cloud', async () => { + jest.mocked(isCloud).mockReturnValue(true) + + await expect( + hookCommander.postRestHook({ ...hook, target: 'http://127.0.0.1' }, { event: 'foo' } as any) + ).rejects.toThrow(new FetchError('Internal hostname', 'posthog-host-guard')) + }) }) }) diff --git a/plugin-server/tests/worker/plugins/mmdb.test.ts b/plugin-server/tests/worker/plugins/mmdb.test.ts index 9bd3769032bd6..8179191a27640 100644 --- a/plugin-server/tests/worker/plugins/mmdb.test.ts +++ b/plugin-server/tests/worker/plugins/mmdb.test.ts @@ -1,7 +1,7 @@ import { ReaderModel } from '@maxmind/geoip2-node' import { readFileSync } from 'fs' import { DateTime } from 'luxon' -import * as fetch from 'node-fetch' +import fetch from 'node-fetch' import { join } from 'path' import { Hub, LogLevel } from '../../../src/types' diff --git a/plugin-server/tests/worker/vm.extra-lazy.test.ts b/plugin-server/tests/worker/vm.extra-lazy.test.ts index 6f971c2e38d36..3c66d319da44d 100644 --- a/plugin-server/tests/worker/vm.extra-lazy.test.ts +++ b/plugin-server/tests/worker/vm.extra-lazy.test.ts @@ -1,4 +1,4 @@ -import * as fetch from 'node-fetch' +import fetch from 'node-fetch' import { Hub, PluginTaskType } from '../../src/types' import { createHub } from '../../src/utils/db/hub' diff --git a/plugin-server/tests/worker/vm.test.ts b/plugin-server/tests/worker/vm.test.ts index 8496a94a5a2c7..5351d9eb6b55a 100644 --- a/plugin-server/tests/worker/vm.test.ts +++ b/plugin-server/tests/worker/vm.test.ts @@ -1,5 +1,5 @@ import { PluginEvent, ProcessedPluginEvent } from '@posthog/plugin-scaffold' -import * as fetch from 'node-fetch' +import fetch from 'node-fetch' import { KAFKA_EVENTS_PLUGIN_INGESTION, KAFKA_PLUGIN_LOG_ENTRIES } from '../../src/config/kafka-topics' import { Hub, PluginLogEntrySource, PluginLogEntryType } from '../../src/types' From 7dc3bab74e225cd57b84cc1e0a5082762fe5bded Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 25 Aug 2023 14:30:00 +0200 Subject: [PATCH 08/10] Roll out hostname guard per project --- plugin-server/src/config/config.ts | 1 + .../on-event-handler-consumer.ts | 8 ++- plugin-server/src/types.ts | 2 + plugin-server/src/utils/db/hub.ts | 4 ++ plugin-server/src/utils/fetch.ts | 34 +++++++----- plugin-server/src/worker/ingestion/hooks.ts | 13 +++-- plugin-server/src/worker/vm/imports.ts | 52 ++++++++++--------- plugin-server/src/worker/vm/vm.ts | 4 +- .../main/ingestion-queues/each-batch.test.ts | 3 +- plugin-server/tests/utils/fetch.test.ts | 4 +- .../event-pipeline-integration.test.ts | 2 +- .../tests/worker/ingestion/hooks.test.ts | 13 +++-- 12 files changed, 91 insertions(+), 49 deletions(-) diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index 1fcdddc774352..4344dc76985cf 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -106,6 +106,7 @@ export function getDefaultConfig(): PluginsServerConfig { CONVERSION_BUFFER_ENABLED_TEAMS: '', CONVERSION_BUFFER_TOPIC_ENABLED_TEAMS: '', BUFFER_CONVERSION_SECONDS: isDevEnv() ? 2 : 60, // KEEP IN SYNC WITH posthog/settings/ingestion.py + FETCH_HOSTNAME_GUARD_TEAMS: '', PERSON_INFO_CACHE_TTL: 5 * 60, // 5 min KAFKA_HEALTHCHECK_SECONDS: 20, OBJECT_STORAGE_ENABLED: true, diff --git a/plugin-server/src/main/ingestion-queues/on-event-handler-consumer.ts b/plugin-server/src/main/ingestion-queues/on-event-handler-consumer.ts index 221c33b1381e4..31a0e425a40b3 100644 --- a/plugin-server/src/main/ingestion-queues/on-event-handler-consumer.ts +++ b/plugin-server/src/main/ingestion-queues/on-event-handler-consumer.ts @@ -85,7 +85,13 @@ export const startAsyncWebhooksHandlerConsumer = async ({ const actionManager = new ActionManager(postgres) await actionManager.prepare() const actionMatcher = new ActionMatcher(postgres, actionManager, statsd) - const hookCannon = new HookCommander(postgres, teamManager, organizationManager, statsd) + const hookCannon = new HookCommander( + postgres, + teamManager, + organizationManager, + new Set(serverConfig.FETCH_HOSTNAME_GUARD_TEAMS.split(',').filter(String).map(Number)), + statsd + ) const concurrency = serverConfig.TASKS_PER_WORKER || 20 const pubSub = new PubSub(serverConfig, { diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index 82c67d8b1b2ac..0ab5f4eafd985 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -179,6 +179,7 @@ export interface PluginsServerConfig { CONVERSION_BUFFER_ENABLED_TEAMS: string CONVERSION_BUFFER_TOPIC_ENABLED_TEAMS: string BUFFER_CONVERSION_SECONDS: number + FETCH_HOSTNAME_GUARD_TEAMS: string PERSON_INFO_CACHE_TTL: number KAFKA_HEALTHCHECK_SECONDS: number OBJECT_STORAGE_ENABLED: boolean // Disables or enables the use of object storage. It will become mandatory to use object storage @@ -257,6 +258,7 @@ export interface Hub extends PluginsServerConfig { lastActivityType: string statelessVms: StatelessVmMap conversionBufferEnabledTeams: Set + fetchHostnameGuardTeams: Set // functions enqueuePluginJob: (job: EnqueuedPluginJob) => Promise } diff --git a/plugin-server/src/utils/db/hub.ts b/plugin-server/src/utils/db/hub.ts index 9b38299470ff4..7d229bce465c3 100644 --- a/plugin-server/src/utils/db/hub.ts +++ b/plugin-server/src/utils/db/hub.ts @@ -70,6 +70,9 @@ export async function createHub( const conversionBufferEnabledTeams = new Set( serverConfig.CONVERSION_BUFFER_ENABLED_TEAMS.split(',').filter(String).map(Number) ) + const fetchHostnameGuardTeams = new Set( + serverConfig.FETCH_HOSTNAME_GUARD_TEAMS.split(',').filter(String).map(Number) + ) const statsd: StatsD | undefined = createStatsdClient(serverConfig, threadId) @@ -182,6 +185,7 @@ export async function createHub( rootAccessManager, promiseManager, conversionBufferEnabledTeams, + fetchHostnameGuardTeams, } // :TODO: This is only used on worker threads, not main diff --git a/plugin-server/src/utils/fetch.ts b/plugin-server/src/utils/fetch.ts index 75f425189ff58..309a4c04d5cda 100644 --- a/plugin-server/src/utils/fetch.ts +++ b/plugin-server/src/utils/fetch.ts @@ -3,31 +3,41 @@ import { LookupAddress } from 'dns' import dns from 'dns/promises' import * as ipaddr from 'ipaddr.js' -import fetch, { FetchError, Request, Response } from 'node-fetch' +import fetch, { type RequestInfo, type RequestInit, type Response, FetchError, Request } from 'node-fetch' import { URL } from 'url' import { runInSpan } from '../sentry' -import { isCloud } from './env-utils' -export function filteredFetch(...args: Parameters): Promise { - const request = new Request(...args) - return runInSpan( +export async function trackedFetch(url: RequestInfo, init: RequestInit | undefined): Promise { + const request = new Request(url, init) + return await runInSpan( + { + op: 'fetch', + description: `${request.method} ${request.url}`, + }, + async () => await fetch(url, init) + ) +} + +trackedFetch.isRedirect = fetch.isRedirect +trackedFetch.FetchError = FetchError + +export async function safeTrackedFetch(url: RequestInfo, init: RequestInit | undefined): Promise { + const request = new Request(url, init) + return await runInSpan( { op: 'fetch', description: `${request.method} ${request.url}`, }, async () => { - if (isCloud()) { - console.log(args, request.url, request.method) - await raiseIfUserProvidedUrlUnsafe(request.url) - } - return await fetch(...args) + await raiseIfUserProvidedUrlUnsafe(request.url) + return await fetch(url, init) } ) } -filteredFetch.isRedirect = fetch.isRedirect -filteredFetch.FetchError = FetchError +safeTrackedFetch.isRedirect = fetch.isRedirect +safeTrackedFetch.FetchError = FetchError /** * Raise if the provided URL seems unsafe, otherwise do nothing. diff --git a/plugin-server/src/worker/ingestion/hooks.ts b/plugin-server/src/worker/ingestion/hooks.ts index d7f39b0a5b575..78ac02daa7fcc 100644 --- a/plugin-server/src/worker/ingestion/hooks.ts +++ b/plugin-server/src/worker/ingestion/hooks.ts @@ -5,7 +5,8 @@ import { format } from 'util' import { Action, Hook, PostIngestionEvent, Team } from '../../types' import { PostgresRouter, PostgresUse } from '../../utils/db/postgres' -import { filteredFetch } from '../../utils/fetch' +import { isCloud } from '../../utils/env-utils' +import { safeTrackedFetch, trackedFetch } from '../../utils/fetch' import { status } from '../../utils/status' import { getPropertyValueByPath, stringify } from '../../utils/utils' import { OrganizationManager } from './organization-manager' @@ -256,6 +257,7 @@ export class HookCommander { organizationManager: OrganizationManager statsd: StatsD | undefined siteUrl: string + fetchHostnameGuardTeams: Set /** Hook request timeout in ms. */ EXTERNAL_REQUEST_TIMEOUT = 10 * 1000 @@ -264,11 +266,13 @@ export class HookCommander { postgres: PostgresRouter, teamManager: TeamManager, organizationManager: OrganizationManager, + fetchHostnameGuardTeams: Set, statsd?: StatsD ) { this.postgres = postgres this.teamManager = teamManager this.organizationManager = organizationManager + this.fetchHostnameGuardTeams = fetchHostnameGuardTeams if (process.env.SITE_URL) { this.siteUrl = process.env.SITE_URL } else { @@ -358,9 +362,10 @@ export class HookCommander { `⌛⌛⌛ Posting Webhook slow. Timeout warning after 5 sec! url=${webhookUrl} team_id=${team.id} event_id=${event.eventUuid}` ) }, 5000) + const relevantFetch = isCloud() && this.fetchHostnameGuardTeams.has(team.id) ? safeTrackedFetch : trackedFetch try { await instrumentWebhookStep('fetch', async () => { - const request = await filteredFetch(webhookUrl, { + const request = await relevantFetch(webhookUrl, { method: 'POST', body: JSON.stringify(message, undefined, 4), headers: { 'Content-Type': 'application/json' }, @@ -399,8 +404,10 @@ export class HookCommander { `⌛⌛⌛ Posting RestHook slow. Timeout warning after 5 sec! url=${hook.target} team_id=${event.teamId} event_id=${event.eventUuid}` ) }, 5000) + const relevantFetch = + isCloud() && this.fetchHostnameGuardTeams.has(hook.team_id) ? safeTrackedFetch : trackedFetch try { - const request = await filteredFetch(hook.target, { + const request = await relevantFetch(hook.target, { method: 'POST', body: JSON.stringify(payload, undefined, 4), headers: { 'Content-Type': 'application/json' }, diff --git a/plugin-server/src/worker/vm/imports.ts b/plugin-server/src/worker/vm/imports.ts index b630ba10dca08..d7b02d87c1c41 100644 --- a/plugin-server/src/worker/vm/imports.ts +++ b/plugin-server/src/worker/vm/imports.ts @@ -12,33 +12,37 @@ import * as jsonwebtoken from 'jsonwebtoken' import * as pg from 'pg' import snowflake from 'snowflake-sdk' import { PassThrough } from 'stream' +import { Hub } from 'types' import * as url from 'url' import * as zlib from 'zlib' -import { filteredFetch } from '../../utils/fetch' +import { isCloud, isTestEnv } from '../../utils/env-utils' +import { safeTrackedFetch, trackedFetch } from '../../utils/fetch' import { writeToFile } from './extensions/test-utils' -export const imports = { - ...(process.env.NODE_ENV === 'test' - ? { - 'test-utils/write-to-file': writeToFile, - } - : {}), - '@google-cloud/bigquery': bigquery, - '@google-cloud/pubsub': pubsub, - '@google-cloud/storage': gcs, - '@posthog/plugin-contrib': contrib, - '@posthog/plugin-scaffold': scaffold, - 'aws-sdk': AWS, - ethers: ethers, - 'generic-pool': genericPool, - 'node-fetch': filteredFetch, - 'snowflake-sdk': snowflake, - crypto: crypto, - jsonwebtoken: jsonwebtoken, - faker: faker, - pg: pg, - stream: { PassThrough }, - url: url, - zlib: zlib, +export function determineImports(hub: Hub, teamId: number) { + return { + ...(isTestEnv() + ? { + 'test-utils/write-to-file': writeToFile, + } + : {}), + '@google-cloud/bigquery': bigquery, + '@google-cloud/pubsub': pubsub, + '@google-cloud/storage': gcs, + '@posthog/plugin-contrib': contrib, + '@posthog/plugin-scaffold': scaffold, + 'aws-sdk': AWS, + ethers: ethers, + 'generic-pool': genericPool, + 'node-fetch': isCloud() && hub.fetchHostnameGuardTeams.has(teamId) ? safeTrackedFetch : trackedFetch, + 'snowflake-sdk': snowflake, + crypto: crypto, + jsonwebtoken: jsonwebtoken, + faker: faker, + pg: pg, + stream: { PassThrough }, + url: url, + zlib: zlib, + } } diff --git a/plugin-server/src/worker/vm/vm.ts b/plugin-server/src/worker/vm/vm.ts index 967701cdff887..95e40ca4a6da8 100644 --- a/plugin-server/src/worker/vm/vm.ts +++ b/plugin-server/src/worker/vm/vm.ts @@ -11,7 +11,7 @@ import { createJobs } from './extensions/jobs' import { createPosthog } from './extensions/posthog' import { createStorage } from './extensions/storage' import { createUtils } from './extensions/utilities' -import { imports } from './imports' +import { determineImports } from './imports' import { transformCode } from './transforms' import { upgradeExportEvents } from './upgrades/export-events' import { addHistoricalEventsExportCapability } from './upgrades/historical-export/export-historical-events' @@ -34,6 +34,8 @@ export function createPluginConfigVM( pluginConfig: PluginConfig, // NB! might have team_id = 0 indexJs: string ): PluginConfigVMResponse { + const imports = determineImports(hub, pluginConfig.team_id) + const timer = new Date() const statsdTiming = (metric: string) => { diff --git a/plugin-server/tests/main/ingestion-queues/each-batch.test.ts b/plugin-server/tests/main/ingestion-queues/each-batch.test.ts index 55dcb1e2f3d23..f0b4273bdb216 100644 --- a/plugin-server/tests/main/ingestion-queues/each-batch.test.ts +++ b/plugin-server/tests/main/ingestion-queues/each-batch.test.ts @@ -202,7 +202,8 @@ describe('eachBatchX', () => { const hookCannon = new HookCommander( queue.pluginsServer.postgres, queue.pluginsServer.teamManager, - queue.pluginsServer.organizationManager + queue.pluginsServer.organizationManager, + new Set() ) const matchSpy = jest.spyOn(actionMatcher, 'match') // mock hasWebhooks to return true diff --git a/plugin-server/tests/utils/fetch.test.ts b/plugin-server/tests/utils/fetch.test.ts index 805d2dda8e778..d6100232a7192 100644 --- a/plugin-server/tests/utils/fetch.test.ts +++ b/plugin-server/tests/utils/fetch.test.ts @@ -1,6 +1,6 @@ -import { filteredFetch, raiseIfUserProvidedUrlUnsafe } from '../../src/utils/fetch' +import { FetchError } from 'node-fetch' -const { FetchError } = filteredFetch +import { raiseIfUserProvidedUrlUnsafe } from '../../src/utils/fetch' test('raiseIfUserProvidedUrlUnsafe', async () => { // Sync test cases with posthog/api/test/test_utils.py diff --git a/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts b/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts index ac456a076276a..4b1a04a0ba4d9 100644 --- a/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts +++ b/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts @@ -45,7 +45,7 @@ describe('Event Pipeline integration test', () => { actionManager = new ActionManager(hub.db.postgres) await actionManager.prepare() actionMatcher = new ActionMatcher(hub.db.postgres, actionManager) - hookCannon = new HookCommander(hub.db.postgres, hub.teamManager, hub.organizationManager) + hookCannon = new HookCommander(hub.db.postgres, hub.teamManager, hub.organizationManager, new Set()) jest.spyOn(hub.db, 'fetchPerson') jest.spyOn(hub.db, 'createPerson') diff --git a/plugin-server/tests/worker/ingestion/hooks.test.ts b/plugin-server/tests/worker/ingestion/hooks.test.ts index 83c7722587cec..19e1f0eb684e5 100644 --- a/plugin-server/tests/worker/ingestion/hooks.test.ts +++ b/plugin-server/tests/worker/ingestion/hooks.test.ts @@ -475,10 +475,9 @@ describe('hooks', () => { beforeEach(() => { jest.mocked(isCloud).mockReturnValue(false) // Disable private IP guard - hookCommander = new HookCommander({} as any, {} as any, {} as any) hook = { id: 'id', - team_id: 2, + team_id: 1, user_id: 1, resource_id: 1, event: 'foo', @@ -486,6 +485,12 @@ describe('hooks', () => { created: new Date().toISOString(), updated: new Date().toISOString(), } + hookCommander = new HookCommander( + {} as any, + {} as any, + {} as any, + new Set([hook.team_id]) // Hostname guard enabled + ) }) test('person = undefined', async () => { @@ -520,7 +525,7 @@ describe('hooks', () => { const uuid = new UUIDT().toString() await hookCommander.postRestHook(hook, { event: 'foo', - teamId: 1, + teamId: hook.team_id, person_id: uuid, person_properties: { foo: 'bar' }, person_created_at: DateTime.fromISO(now).toUTC(), @@ -535,7 +540,7 @@ describe('hooks', () => { }, data: { event: 'foo', - teamId: 1, + teamId: hook.team_id, person: { uuid: uuid, properties: { foo: 'bar' }, From a030f1f3fa20616648029e251c3e04b2a1d8ba2f Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 25 Aug 2023 15:07:06 +0200 Subject: [PATCH 09/10] Fix fetch call assertions --- plugin-server/src/utils/fetch.ts | 4 +-- .../tests/worker/vm.extra-lazy.test.ts | 6 ++-- plugin-server/tests/worker/vm.test.ts | 30 +++++++++++-------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/plugin-server/src/utils/fetch.ts b/plugin-server/src/utils/fetch.ts index 309a4c04d5cda..298e9e70debad 100644 --- a/plugin-server/src/utils/fetch.ts +++ b/plugin-server/src/utils/fetch.ts @@ -8,7 +8,7 @@ import { URL } from 'url' import { runInSpan } from '../sentry' -export async function trackedFetch(url: RequestInfo, init: RequestInit | undefined): Promise { +export async function trackedFetch(url: RequestInfo, init?: RequestInit): Promise { const request = new Request(url, init) return await runInSpan( { @@ -22,7 +22,7 @@ export async function trackedFetch(url: RequestInfo, init: RequestInit | undefin trackedFetch.isRedirect = fetch.isRedirect trackedFetch.FetchError = FetchError -export async function safeTrackedFetch(url: RequestInfo, init: RequestInit | undefined): Promise { +export async function safeTrackedFetch(url: RequestInfo, init?: RequestInit): Promise { const request = new Request(url, init) return await runInSpan( { diff --git a/plugin-server/tests/worker/vm.extra-lazy.test.ts b/plugin-server/tests/worker/vm.extra-lazy.test.ts index 3c66d319da44d..e571b2f809b59 100644 --- a/plugin-server/tests/worker/vm.extra-lazy.test.ts +++ b/plugin-server/tests/worker/vm.extra-lazy.test.ts @@ -39,7 +39,7 @@ describe('VMs are extra lazy 💤', () => { expect(lazyVm.ready).toEqual(true) expect(lazyVm.setupPluginIfNeeded).not.toHaveBeenCalled() - expect(fetch).toHaveBeenCalledWith('https://onevent.com/') + expect(fetch).toHaveBeenCalledWith('https://onevent.com/', undefined) }) test('VM with jobs gets setup immediately', async () => { @@ -64,7 +64,7 @@ describe('VMs are extra lazy 💤', () => { expect(lazyVm.ready).toEqual(true) expect(lazyVm.setupPluginIfNeeded).not.toHaveBeenCalled() - expect(fetch).toHaveBeenCalledWith('https://onevent.com/') + expect(fetch).toHaveBeenCalledWith('https://onevent.com/', undefined) }) test('VM without tasks delays setup until necessary', async () => { @@ -91,7 +91,7 @@ describe('VMs are extra lazy 💤', () => { await lazyVm.getOnEvent() expect(lazyVm.ready).toEqual(true) expect(lazyVm.setupPluginIfNeeded).toHaveBeenCalled() - expect(fetch).toHaveBeenCalledWith('https://onevent.com/') + expect(fetch).toHaveBeenCalledWith('https://onevent.com/', undefined) }) test('getting methods and tasks returns null if plugin is in errored state', async () => { diff --git a/plugin-server/tests/worker/vm.test.ts b/plugin-server/tests/worker/vm.test.ts index 5351d9eb6b55a..138c813d5c70b 100644 --- a/plugin-server/tests/worker/vm.test.ts +++ b/plugin-server/tests/worker/vm.test.ts @@ -122,7 +122,7 @@ describe('vm tests', () => { }) expect(fetch).not.toHaveBeenCalled() await vm.methods.teardownPlugin!() - expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=hoho') + expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=hoho', undefined) }) test('processEvent', async () => { @@ -376,7 +376,7 @@ describe('vm tests', () => { event: 'export', } await vm.methods.onEvent!(event) - expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=export') + expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=export', undefined) }) test('export default', async () => { @@ -395,7 +395,7 @@ describe('vm tests', () => { event: 'default export', } await vm.methods.onEvent!(event) - expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=default export') + expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=default export', undefined) }) }) @@ -723,7 +723,7 @@ describe('vm tests', () => { } await vm.methods.processEvent!(event) - expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=fetched') + expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=fetched', undefined) expect(event.properties).toEqual({ count: 2, query: 'bla', results: [true, true] }) }) @@ -745,7 +745,7 @@ describe('vm tests', () => { } await vm.methods.processEvent!(event) - expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=fetched') + expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=fetched', undefined) expect(event.properties).toEqual({ count: 2, query: 'bla', results: [true, true] }) }) @@ -766,7 +766,7 @@ describe('vm tests', () => { } await vm.methods.processEvent!(event) - expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=fetched') + expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=fetched', undefined) expect(event.properties).toEqual({ count: 2, query: 'bla', results: [true, true] }) }) @@ -1051,7 +1051,7 @@ describe('vm tests', () => { event: 'onEvent', } await vm.methods.onEvent!(event) - expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=onEvent') + expect(fetch).toHaveBeenCalledWith('https://google.com/results.json?query=onEvent', undefined) }) describe('exportEvents', () => { @@ -1085,7 +1085,7 @@ describe('vm tests', () => { await vm.methods.onEvent!({ ...defaultEvent, event: 'otherEvent2' }) await vm.methods.onEvent!({ ...defaultEvent, event: 'otherEvent3' }) await delay(1010) - expect(fetch).toHaveBeenCalledWith('https://export.com/results.json?query=otherEvent2&events=2') + expect(fetch).toHaveBeenCalledWith('https://export.com/results.json?query=otherEvent2&events=2', undefined) expect(hub.appMetrics.queueMetric).toHaveBeenCalledWith({ teamId: pluginConfig39.team_id, pluginConfigId: pluginConfig39.id, @@ -1136,8 +1136,8 @@ describe('vm tests', () => { await vm.methods.onEvent!(event) await delay(1010) expect(fetch).toHaveBeenCalledTimes(4) - expect(fetch).toHaveBeenCalledWith('https://onevent.com/') - expect(fetch).toHaveBeenCalledWith('https://export.com/results.json?query=exported&events=2') + expect(fetch).toHaveBeenCalledWith('https://onevent.com/', undefined) + expect(fetch).toHaveBeenCalledWith('https://export.com/results.json?query=exported&events=2', undefined) }) test('buffers bytes with exportEventsBufferBytes', async () => { @@ -1264,10 +1264,16 @@ describe('vm tests', () => { indexJs ) await vm.methods.onEvent!(defaultEvent) - expect(fetch).not.toHaveBeenCalledWith('https://export.com/results.json?query=default event&events=1') + expect(fetch).not.toHaveBeenCalledWith( + 'https://export.com/results.json?query=default event&events=1', + undefined + ) await vm.methods.teardownPlugin!() - expect(fetch).toHaveBeenCalledWith('https://export.com/results.json?query=default event&events=1') + expect(fetch).toHaveBeenCalledWith( + 'https://export.com/results.json?query=default event&events=1', + undefined + ) }) }) From 99623565feb220380a44539e0dea70fb8c5ea267 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Mon, 18 Sep 2023 11:59:48 +0200 Subject: [PATCH 10/10] Make `fetchHostnameGuardTeams` optional --- plugin-server/src/worker/ingestion/hooks.ts | 4 ++-- plugin-server/tests/main/ingestion-queues/each-batch.test.ts | 3 +-- .../event-pipeline/event-pipeline-integration.test.ts | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/plugin-server/src/worker/ingestion/hooks.ts b/plugin-server/src/worker/ingestion/hooks.ts index 78ac02daa7fcc..2cc8279c88d52 100644 --- a/plugin-server/src/worker/ingestion/hooks.ts +++ b/plugin-server/src/worker/ingestion/hooks.ts @@ -266,13 +266,13 @@ export class HookCommander { postgres: PostgresRouter, teamManager: TeamManager, organizationManager: OrganizationManager, - fetchHostnameGuardTeams: Set, + fetchHostnameGuardTeams?: Set, statsd?: StatsD ) { this.postgres = postgres this.teamManager = teamManager this.organizationManager = organizationManager - this.fetchHostnameGuardTeams = fetchHostnameGuardTeams + this.fetchHostnameGuardTeams = fetchHostnameGuardTeams || new Set() if (process.env.SITE_URL) { this.siteUrl = process.env.SITE_URL } else { diff --git a/plugin-server/tests/main/ingestion-queues/each-batch.test.ts b/plugin-server/tests/main/ingestion-queues/each-batch.test.ts index f0b4273bdb216..55dcb1e2f3d23 100644 --- a/plugin-server/tests/main/ingestion-queues/each-batch.test.ts +++ b/plugin-server/tests/main/ingestion-queues/each-batch.test.ts @@ -202,8 +202,7 @@ describe('eachBatchX', () => { const hookCannon = new HookCommander( queue.pluginsServer.postgres, queue.pluginsServer.teamManager, - queue.pluginsServer.organizationManager, - new Set() + queue.pluginsServer.organizationManager ) const matchSpy = jest.spyOn(actionMatcher, 'match') // mock hasWebhooks to return true diff --git a/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts b/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts index 4b1a04a0ba4d9..ac456a076276a 100644 --- a/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts +++ b/plugin-server/tests/worker/ingestion/event-pipeline/event-pipeline-integration.test.ts @@ -45,7 +45,7 @@ describe('Event Pipeline integration test', () => { actionManager = new ActionManager(hub.db.postgres) await actionManager.prepare() actionMatcher = new ActionMatcher(hub.db.postgres, actionManager) - hookCannon = new HookCommander(hub.db.postgres, hub.teamManager, hub.organizationManager, new Set()) + hookCannon = new HookCommander(hub.db.postgres, hub.teamManager, hub.organizationManager) jest.spyOn(hub.db, 'fetchPerson') jest.spyOn(hub.db, 'createPerson')