diff --git a/docker/livestream/configs-dev.yml b/docker/livestream/configs-dev.yml new file mode 100644 index 00000000000000..5f7daf3ac52870 --- /dev/null +++ b/docker/livestream/configs-dev.yml @@ -0,0 +1,14 @@ +prod: false +tailscale: + controlUrl: + hostname: 'live-events-dev' +kafka: + brokers: 'kafka:9092' + topic: 'events_plugin_ingestion' + group_id: 'livestream-dev' +mmdb: + path: 'GeoLite2-City.mmdb' +jwt: + token: '' +postgres: + url: 'postgres://posthog:posthog@db:5432/posthog' diff --git a/ee/api/test/test_billing.py b/ee/api/test/test_billing.py index d4586149f16c36..2b4d38dd85bd8a 100644 --- a/ee/api/test/test_billing.py +++ b/ee/api/test/test_billing.py @@ -375,19 +375,11 @@ def mock_implementation(url: str, headers: Any = None, params: Any = None) -> Ma "unit_amount_usd": "0.00", "up_to": 1000000, "current_amount_usd": "0.00", - "current_usage": 0, - "flat_amount_usd": "0", - "projected_amount_usd": "None", - "projected_usage": None, }, { "unit_amount_usd": "0.00045", "up_to": 2000000, "current_amount_usd": "0.00", - "current_usage": 0, - "flat_amount_usd": "0", - "projected_amount_usd": "None", - "projected_usage": None, }, ], "tiered": True, @@ -418,19 +410,11 @@ def mock_implementation(url: str, headers: Any = None, params: Any = None) -> Ma "tiers": [ { "current_amount_usd": "0.00", - "current_usage": 0, - "flat_amount_usd": "0", - "projected_amount_usd": "None", - "projected_usage": None, "unit_amount_usd": "0.00", "up_to": 1000000, }, { "current_amount_usd": "0.00", - "current_usage": 0, - "flat_amount_usd": "0", - "projected_amount_usd": "None", - "projected_usage": None, "unit_amount_usd": "0.0000135", "up_to": 2000000, }, @@ -512,7 +496,7 @@ def mock_implementation(url: str, headers: Any = None, params: Any = None) -> Ma ], "current_usage": 0, "percentage_usage": 0, - "current_amount_usd": "0.00", + "current_amount_usd": 0.0, "has_exceeded_limit": False, "projected_amount_usd": 0.0, "projected_amount": 0, @@ -525,7 +509,7 @@ def mock_implementation(url: str, headers: Any = None, params: Any = None) -> Ma "usage_key": "events", "addons": [ { - "current_amount_usd": "0.00", + "current_amount_usd": 0.0, "current_usage": 0, "description": "Test Addon", "free_allocation": 10000, @@ -758,217 +742,15 @@ def mock_implementation(url: str, headers: Any = None, params: Any = None) -> Ma res_json = res.json() # Should update product usage to reflect today's usage assert res_json["products"][0]["current_usage"] == 1101000 - assert res_json["products"][0]["current_amount_usd"] == "45.45" - assert res_json["products"][0]["tiers"][0]["current_usage"] == 1000000 - assert res_json["products"][0]["tiers"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["tiers"][1]["current_usage"] == 101000 - assert res_json["products"][0]["tiers"][1]["current_amount_usd"] == "45.45" - - assert res_json["products"][0]["addons"][0]["current_usage"] == 1101000 - assert res_json["products"][0]["addons"][0]["current_amount_usd"] == "1.36" - assert res_json["products"][0]["addons"][0]["tiers"][0]["current_usage"] == 1000000 - assert res_json["products"][0]["addons"][0]["tiers"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["addons"][0]["tiers"][1]["current_usage"] == 101000 - assert res_json["products"][0]["addons"][0]["tiers"][1]["current_amount_usd"] == "1.36" - - # Now test when there is a usage_limit. - def mock_implementation_with_limit(url: str, headers: Any = None, params: Any = None) -> MagicMock: - mock = MagicMock() - mock.status_code = 404 - - if "api/billing/portal" in url: - mock.status_code = 200 - mock.json.return_value = {"url": "https://billing.stripe.com/p/session/test_1234"} - elif "api/billing" in url: - mock.status_code = 200 - mock.json.return_value = create_billing_response( - customer=create_billing_customer(has_active_subscription=True), - ) - mock.json.return_value["customer"]["usage_summary"]["events"]["usage"] = 1000000 - mock.json.return_value["customer"]["usage_summary"]["events"]["limit"] = 1000000 - elif "api/products" in url: - mock.status_code = 200 - mock.json.return_value = create_billing_products_response() - - return mock - - mock_request.side_effect = mock_implementation_with_limit - self.organization.usage = {"events": {"limit": 1000000, "usage": 1000000, "todays_usage": 100}} - self.organization.save() - - res = self.client.get("/api/billing") - assert res.status_code == 200 - res_json = res.json() - # Should update product usage to reflect today's usage - assert res_json["products"][0]["current_usage"] == 1000100 - assert res_json["products"][0]["current_amount_usd"] == "0.04" - assert res_json["products"][0]["tiers"][0]["current_usage"] == 1000000 - assert res_json["products"][0]["tiers"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["tiers"][1]["current_usage"] == 100 - assert res_json["products"][0]["tiers"][1]["current_amount_usd"] == "0.04" - - assert res_json["products"][0]["addons"][0]["current_usage"] == 1000100 - assert res_json["products"][0]["addons"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["addons"][0]["tiers"][0]["current_usage"] == 1000000 - assert res_json["products"][0]["addons"][0]["tiers"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["addons"][0]["tiers"][1]["current_usage"] == 100 - assert res_json["products"][0]["addons"][0]["tiers"][1]["current_amount_usd"] == "0.00" - - def mock_implementation_exceeds_limit(url: str, headers: Any = None, params: Any = None) -> MagicMock: - mock = MagicMock() - mock.status_code = 404 - - if "api/billing/portal" in url: - mock.status_code = 200 - mock.json.return_value = {"url": "https://billing.stripe.com/p/session/test_1234"} - elif "api/billing" in url: - mock.status_code = 200 - mock.json.return_value = create_billing_response( - customer=create_billing_customer(has_active_subscription=True), - ) - mock.json.return_value["customer"]["usage_summary"]["events"]["usage"] = 1100000 - mock.json.return_value["customer"]["usage_summary"]["events"]["limit"] = 1000000 - elif "api/products" in url: - mock.status_code = 200 - mock.json.return_value = create_billing_products_response() - - return mock - - mock_request.side_effect = mock_implementation_exceeds_limit - self.organization.usage = {"events": {"limit": 1000000, "usage": 1100000, "todays_usage": 1000}} - self.organization.save() - - res = self.client.get("/api/billing") - assert res.status_code == 200 - res_json = res.json() - # Should update product usage to reflect today's usage - assert res_json["products"][0]["current_usage"] == 1101000 - assert res_json["products"][0]["current_amount_usd"] == "45.00" - assert res_json["products"][0]["tiers"][0]["current_usage"] == 1000000 - assert res_json["products"][0]["tiers"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["tiers"][1]["current_usage"] == 100000 - assert res_json["products"][0]["tiers"][1]["current_amount_usd"] == "45.00" - - assert res_json["products"][0]["addons"][0]["current_usage"] == 1101000 - assert res_json["products"][0]["addons"][0]["current_amount_usd"] == "1.35" - assert res_json["products"][0]["addons"][0]["tiers"][0]["current_usage"] == 1000000 - assert res_json["products"][0]["addons"][0]["tiers"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["addons"][0]["tiers"][1]["current_usage"] == 100000 - assert res_json["products"][0]["addons"][0]["tiers"][1]["current_amount_usd"] == "1.35" - - # Test when the customer has no usage. Ensure that the tiered current_usage isn't set to the usage limit. - def mock_implementation_with_limit_no_usage(url: str, headers: Any = None, params: Any = None) -> MagicMock: - mock = MagicMock() - mock.status_code = 404 - - if "api/billing/portal" in url: - mock.status_code = 200 - mock.json.return_value = {"url": "https://billing.stripe.com/p/session/test_1234"} - elif "api/billing" in url: - mock.status_code = 200 - mock.json.return_value = create_billing_response( - customer=create_billing_customer(has_active_subscription=True), - ) - mock.json.return_value["customer"]["usage_summary"]["events"]["usage"] = 0 - mock.json.return_value["customer"]["usage_summary"]["events"]["limit"] = 1000000 - elif "api/products" in url: - mock.status_code = 200 - mock.json.return_value = create_billing_products_response() - - return mock - - mock_request.side_effect = mock_implementation_with_limit_no_usage - - self.organization.usage = {"events": {"limit": 1000000, "usage": 0, "todays_usage": 0}} - self.organization.save() - - res = self.client.get("/api/billing") - assert res.status_code == 200 - res_json = res.json() - # Should update product usage to reflect today's usage - assert res_json["products"][0]["current_usage"] == 0 assert res_json["products"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["tiers"][0]["current_usage"] == 0 assert res_json["products"][0]["tiers"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["tiers"][1]["current_usage"] == 0 assert res_json["products"][0]["tiers"][1]["current_amount_usd"] == "0.00" assert res_json["products"][0]["addons"][0]["current_usage"] == 0 assert res_json["products"][0]["addons"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["addons"][0]["tiers"][0]["current_usage"] == 0 assert res_json["products"][0]["addons"][0]["tiers"][0]["current_amount_usd"] == "0.00" - assert res_json["products"][0]["addons"][0]["tiers"][1]["current_usage"] == 0 assert res_json["products"][0]["addons"][0]["tiers"][1]["current_amount_usd"] == "0.00" - def mock_implementation_missing_customer(url: str, headers: Any = None, params: Any = None) -> MagicMock: - mock = MagicMock() - mock.status_code = 404 - - if "api/billing/portal" in url: - mock.status_code = 200 - mock.json.return_value = {"url": "https://billing.stripe.com/p/session/test_1234"} - elif "api/billing" in url: - mock.status_code = 200 - mock.json.return_value = create_billing_response(customer=create_missing_billing_customer()) - elif "api/products" in url: - mock.status_code = 200 - mock.json.return_value = create_billing_products_response() - - return mock - - mock_request.side_effect = mock_implementation_missing_customer - - # Test unsubscribed config - res = self.client.get("/api/billing") - self.organization.refresh_from_db() - assert self.organization.usage == { - "events": { - "limit": None, - "todays_usage": 0, - "usage": 0, - }, - "recordings": { - "limit": None, - "todays_usage": 0, - "usage": 0, - }, - "rows_synced": { - "limit": None, - "todays_usage": 0, - "usage": 0, - }, - "period": ["2022-10-07T11:12:48", "2022-11-07T11:12:48"], - } - assert self.organization.customer_id == "cus_123" - - # Now test when there is a tiered product in the response that isn't in the usage dict - def mock_implementation(url: str, headers: Any = None, params: Any = None) -> MagicMock: - mock = MagicMock() - mock.status_code = 404 - - if "api/billing/portal" in url: - mock.status_code = 200 - mock.json.return_value = {"url": "https://billing.stripe.com/p/session/test_1234"} - elif "api/billing" in url: - mock.status_code = 200 - mock.json.return_value = create_billing_response( - customer=create_billing_customer(has_active_subscription=True), - ) - mock.json.return_value["customer"]["products"][0]["usage_key"] = "feature_flag_requests" - mock.json.return_value["customer"]["usage_summary"]["events"]["usage"] = 1000 - elif "api/products" in url: - mock.status_code = 200 - mock.json.return_value = create_billing_products_response() - - return mock - - mock_request.side_effect = mock_implementation - self.organization.usage = {"events": {"limit": 1000000, "usage": 1000, "todays_usage": 1100000}} - self.organization.save() - - res = self.client.get("/api/billing") - assert res.status_code == 200 - @patch("ee.api.billing.requests.get") def test_organization_usage_count_with_demo_project(self, mock_request, *args): def mock_implementation(url: str, headers: Any = None, params: Any = None) -> MagicMock: diff --git a/ee/billing/billing_manager.py b/ee/billing/billing_manager.py index 2f35a7be3b92e5..4f735d4890b3d0 100644 --- a/ee/billing/billing_manager.py +++ b/ee/billing/billing_manager.py @@ -1,7 +1,6 @@ from datetime import datetime, timedelta -from decimal import Decimal from enum import Enum -from typing import Any, Optional, Union, cast +from typing import Any, Optional, cast import jwt import requests @@ -11,7 +10,7 @@ from rest_framework.exceptions import NotAuthenticated from sentry_sdk import capture_exception -from ee.billing.billing_types import BillingStatus, Tier +from ee.billing.billing_types import BillingStatus from ee.billing.quota_limiting import set_org_usage_summary, sync_org_quota_limits from ee.models import License from ee.settings import BILLING_SERVICE_URL @@ -58,61 +57,6 @@ def handle_billing_service_error(res: requests.Response, valid_codes=(200, 404, raise Exception(f"Billing service returned bad status code: {res.status_code}", f"body:", res.text) -def compute_usage_per_tier(limited_usage: int, projected_usage: int, tiers): - remaining_usage = limited_usage - remaining_projected_usage = projected_usage or 0 - previous_tier: Optional[dict[str, Any]] = None - tier_max_usage: Union[int, float] = 0 - - result: list[Tier] = [] - for tier in tiers: - if previous_tier and previous_tier.get("up_to"): - previous_tier_up_to = previous_tier["up_to"] - else: - previous_tier_up_to = 0 - - if tier.get("up_to"): - tier_max_usage = tier["up_to"] - previous_tier_up_to - else: - tier_max_usage = float("inf") - - flat_amount_usd = Decimal(tier.get("flat_amount_usd") or 0) - unit_amount_usd = Decimal(tier.get("unit_amount_usd") or 0) - usage_this_tier = int(min(remaining_usage, tier_max_usage)) - remaining_usage -= usage_this_tier - current_amount_usd = Decimal(unit_amount_usd * usage_this_tier + flat_amount_usd).quantize(Decimal("0.01")) - previous_tier = tier - if projected_usage: - projected_usage_this_tier = int(min(remaining_projected_usage, tier_max_usage)) - remaining_projected_usage -= projected_usage_this_tier - projected_amount_usd = Decimal(unit_amount_usd * projected_usage_this_tier + flat_amount_usd).quantize( - Decimal("0.01") - ) - else: - projected_usage_this_tier = None - projected_amount_usd = None - - result.append( - Tier( - flat_amount_usd=str(flat_amount_usd), - unit_amount_usd=str(unit_amount_usd), - up_to=tier.get("up_to", None), - current_amount_usd=str(current_amount_usd), - current_usage=usage_this_tier, - projected_usage=projected_usage_this_tier, - projected_amount_usd=str(projected_amount_usd), - ) - ) - return result - - -def sum_total_across_tiers(tiers): - total = Decimal(0) - for tier in tiers: - total += Decimal(tier["current_amount_usd"]) - return total - - class BillingManager: license: Optional[License] @@ -164,50 +108,6 @@ def get_billing(self, organization: Optional[Organization], plan_keys: Optional[ product["current_usage"] = current_usage product["percentage_usage"] = current_usage / usage_limit if usage_limit else 0 - - # Also update the tiers - if product.get("tiers"): - usage_limit = product_usage.get("limit") - limited_usage = 0 - # If the usage has already exceeded the billing limit, don't increment - # today's usage - if usage_limit is not None and billing_reported_usage > usage_limit: - limited_usage = billing_reported_usage - else: - limited_usage = current_usage - - product["tiers"] = compute_usage_per_tier( - limited_usage, product["projected_usage"], product["tiers"] - ) - product["current_amount_usd"] = str(sum_total_across_tiers(product["tiers"])) - - # Update the add on tiers - # TODO: enhanced_persons: make sure this updates properly for addons with different usage keys - for addon in product.get("addons"): - if not addon.get("subscribed"): - continue - addon_usage_key = addon.get("usage_key") - if not usage_key: - continue - if addon_usage_key != usage_key: - usage = response.get("usage_summary", {}).get(addon_usage_key, {}) - usage_limit = usage.get("limit") - billing_reported_usage = usage.get("usage") or 0 - if product_usage.get("todays_usage"): - todays_usage = product_usage["todays_usage"] - current_usage = billing_reported_usage + todays_usage - addon["current_usage"] = current_usage - - limited_usage = 0 - # If the usage has already exceeded the billing limit, don't increment - # today's usage - if usage_limit is not None and billing_reported_usage > usage_limit: - limited_usage = billing_reported_usage - else: - # Otherwise, do increment toady's usage - limited_usage = current_usage - addon["tiers"] = compute_usage_per_tier(limited_usage, addon["projected_usage"], addon["tiers"]) - addon["current_amount_usd"] = str(sum_total_across_tiers(addon["tiers"])) else: products = self.get_default_products(organization) response = { diff --git a/ee/clickhouse/queries/funnels/funnel_correlation.py b/ee/clickhouse/queries/funnels/funnel_correlation.py index efa84347730d75..9bf35aaa67b8c4 100644 --- a/ee/clickhouse/queries/funnels/funnel_correlation.py +++ b/ee/clickhouse/queries/funnels/funnel_correlation.py @@ -29,7 +29,7 @@ from posthog.queries.insight import insight_sync_execute from posthog.queries.person_distinct_id_query import get_team_distinct_ids_query from posthog.queries.person_query import PersonQuery -from posthog.queries.util import correct_result_for_sampling +from posthog.queries.util import alias_poe_mode_for_legacy, correct_result_for_sampling from posthog.schema import PersonsOnEventsMode from posthog.utils import generate_short_id @@ -152,7 +152,7 @@ def __init__( def properties_to_include(self) -> list[str]: props_to_include = [] if ( - self._team.person_on_events_mode != PersonsOnEventsMode.DISABLED + alias_poe_mode_for_legacy(self._team.person_on_events_mode) != PersonsOnEventsMode.DISABLED and self._filter.correlation_type == FunnelCorrelationType.PROPERTIES ): # When dealing with properties, make sure funnel response comes with properties @@ -499,7 +499,10 @@ def _get_events_join_query(self) -> str: def _get_aggregation_join_query(self): if self._filter.aggregation_group_type_index is None: - if self._team.person_on_events_mode != PersonsOnEventsMode.DISABLED and groups_on_events_querying_enabled(): + if ( + alias_poe_mode_for_legacy(self._team.person_on_events_mode) != PersonsOnEventsMode.DISABLED + and groups_on_events_querying_enabled() + ): return "", {} person_query, person_query_params = PersonQuery( @@ -519,7 +522,10 @@ def _get_aggregation_join_query(self): return GroupsJoinQuery(self._filter, self._team.pk, join_key="funnel_actors.actor_id").get_join_query() def _get_properties_prop_clause(self): - if self._team.person_on_events_mode != PersonsOnEventsMode.DISABLED and groups_on_events_querying_enabled(): + if ( + alias_poe_mode_for_legacy(self._team.person_on_events_mode) != PersonsOnEventsMode.DISABLED + and groups_on_events_querying_enabled() + ): group_properties_field = f"group{self._filter.aggregation_group_type_index}_properties" aggregation_properties_alias = ( "person_properties" if self._filter.aggregation_group_type_index is None else group_properties_field @@ -546,7 +552,9 @@ def _get_properties_prop_clause(self): param_name = f"property_name_{index}" if self._filter.aggregation_group_type_index is not None: expression, _ = get_property_string_expr( - "groups" if self._team.person_on_events_mode == PersonsOnEventsMode.DISABLED else "events", + "groups" + if alias_poe_mode_for_legacy(self._team.person_on_events_mode) == PersonsOnEventsMode.DISABLED + else "events", property_name, f"%({param_name})s", aggregation_properties_alias, @@ -554,13 +562,16 @@ def _get_properties_prop_clause(self): ) else: expression, _ = get_property_string_expr( - "person" if self._team.person_on_events_mode == PersonsOnEventsMode.DISABLED else "events", + "person" + if alias_poe_mode_for_legacy(self._team.person_on_events_mode) == PersonsOnEventsMode.DISABLED + else "events", property_name, f"%({param_name})s", aggregation_properties_alias, materialised_table_column=( aggregation_properties_alias - if self._team.person_on_events_mode != PersonsOnEventsMode.DISABLED + if alias_poe_mode_for_legacy(self._team.person_on_events_mode) + != PersonsOnEventsMode.DISABLED else "properties" ), ) diff --git a/ee/clickhouse/queries/groups_join_query.py b/ee/clickhouse/queries/groups_join_query.py index ddb7d193d6d9bc..70334d0f2df1da 100644 --- a/ee/clickhouse/queries/groups_join_query.py +++ b/ee/clickhouse/queries/groups_join_query.py @@ -8,7 +8,7 @@ from posthog.models.filters.utils import GroupTypeIndex from posthog.models.property.util import parse_prop_grouped_clauses from posthog.models.team.team import groups_on_events_querying_enabled -from posthog.queries.util import PersonPropertiesMode +from posthog.queries.util import PersonPropertiesMode, alias_poe_mode_for_legacy from posthog.schema import PersonsOnEventsMode @@ -33,7 +33,7 @@ def __init__( self._team_id = team_id self._column_optimizer = column_optimizer or EnterpriseColumnOptimizer(self._filter, self._team_id) self._join_key = join_key - self._person_on_events_mode = person_on_events_mode + self._person_on_events_mode = alias_poe_mode_for_legacy(person_on_events_mode) def get_join_query(self) -> tuple[str, dict]: join_queries, params = [], {} diff --git a/frontend/__snapshots__/filters-universalfilters--default--dark.png b/frontend/__snapshots__/filters-universalfilters--default--dark.png new file mode 100644 index 00000000000000..9d5153cb3fe5c2 Binary files /dev/null and b/frontend/__snapshots__/filters-universalfilters--default--dark.png differ diff --git a/frontend/__snapshots__/filters-universalfilters--default--light.png b/frontend/__snapshots__/filters-universalfilters--default--light.png new file mode 100644 index 00000000000000..91683cf1ceb2a0 Binary files /dev/null and b/frontend/__snapshots__/filters-universalfilters--default--light.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light.png index 87e964fc8ec9ca..47914be1ffc6ad 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light.png differ diff --git a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs--dark.png b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs--dark.png index 1b60d7a49a3cca..4f4f10f435fcaf 100644 Binary files a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs--dark.png and b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs--light.png b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs--light.png index 990fb268b7c6e1..fa333ce9cd3879 100644 Binary files a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs--light.png and b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs--light.png differ diff --git a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs-batch-export--dark.png b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs-batch-export--dark.png index e25ebfaf3dca94..d368a3388deecb 100644 Binary files a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs-batch-export--dark.png and b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs-batch-export--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs-batch-export--light.png b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs-batch-export--light.png index 7225b14c811a61..37993b37f4551c 100644 Binary files a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs-batch-export--light.png and b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-logs-batch-export--light.png differ diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index d0a94739c95f4d..73e64be958e59c 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -5,6 +5,7 @@ import { ActivityLogItem } from 'lib/components/ActivityLog/humanizeActivity' import { apiStatusLogic } from 'lib/logic/apiStatusLogic' import { objectClean, toParams } from 'lib/utils' import posthog from 'posthog-js' +import { LogEntry } from 'scenes/pipeline/pipelineNodeLogsLogic' import { SavedSessionRecordingPlaylistsResult } from 'scenes/session-recordings/saved-playlists/savedSessionRecordingPlaylistsLogic' import { getCurrentExporterData } from '~/exporter/exporterViewLogic' @@ -1659,6 +1660,12 @@ const api = { async update(id: HogFunctionType['id'], data: Partial): Promise { return await new ApiRequest().hogFunction(id).update({ data }) }, + async searchLogs( + id: HogFunctionType['id'], + params: Record = {} + ): Promise> { + return await new ApiRequest().hogFunction(id).withAction('logs').withQueryString(params).get() + }, }, annotations: { diff --git a/frontend/src/lib/components/TaxonomicFilter/InfiniteSelectResults.tsx b/frontend/src/lib/components/TaxonomicFilter/InfiniteSelectResults.tsx index f9579316b153f7..89bf8bbd9b2f3a 100644 --- a/frontend/src/lib/components/TaxonomicFilter/InfiniteSelectResults.tsx +++ b/frontend/src/lib/components/TaxonomicFilter/InfiniteSelectResults.tsx @@ -68,7 +68,7 @@ export function InfiniteSelectResults({ selectItem(activeTaxonomicGroup, newValue, item)} + onChange={(newValue) => selectItem(activeTaxonomicGroup, newValue, newValue)} /> ) : ( diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx index 07e8c6d51b86f0..12cb6ea2814ec2 100644 --- a/frontend/src/lib/constants.tsx +++ b/frontend/src/lib/constants.tsx @@ -205,6 +205,7 @@ export const FEATURE_FLAGS = { LIVE_EVENTS: 'live-events', // owner: @zach or @jams SESSION_REPLAY_NETWORK_VIEW: 'session-replay-network-view', // owner: #team-replay SETTINGS_PERSONS_JOIN_MODE: 'settings-persons-join-mode', // owner: @robbie-c + SETTINGS_PERSONS_ON_EVENTS_HIDDEN: 'settings-persons-on-events-hidden', // owner: @Twixes HOG: 'hog', // owner: @mariusandra HOG_FUNCTIONS: 'hog-functions', // owner: #team-cdp PERSONLESS_EVENTS_NOT_SUPPORTED: 'personless-events-not-supported', // owner: @raquelmsmith diff --git a/frontend/src/scenes/billing/Billing.tsx b/frontend/src/scenes/billing/Billing.tsx index 8bd8d5305432ab..c6cb267ffd2552 100644 --- a/frontend/src/scenes/billing/Billing.tsx +++ b/frontend/src/scenes/billing/Billing.tsx @@ -145,7 +145,7 @@ export function Billing(): JSX.Element { {billing?.has_active_subscription && ( <> Current bill total diff --git a/frontend/src/scenes/billing/BillingProduct.tsx b/frontend/src/scenes/billing/BillingProduct.tsx index be58d1a8eb1688..215ddcd1efe844 100644 --- a/frontend/src/scenes/billing/BillingProduct.tsx +++ b/frontend/src/scenes/billing/BillingProduct.tsx @@ -208,7 +208,7 @@ export const BillingProduct = ({ product }: { product: BillingProductV2Type }): billing?.discount_percent ? 'discounted ' : '' }amount you have been billed for this ${ billing?.billing_period?.interval - } so far.`} + } so far. This number updates once daily.`} >
@@ -235,7 +235,7 @@ export const BillingProduct = ({ product }: { product: BillingProductV2Type }): billing?.discount_percent ? ', discounts on your account,' : '' - } and the remaining time left in this billing period.`} + } and the remaining time left in this billing period. This number updates once daily.`} >
diff --git a/frontend/src/scenes/billing/BillingProductPricingTable.tsx b/frontend/src/scenes/billing/BillingProductPricingTable.tsx index 344e6077598c72..196551d3a2ba53 100644 --- a/frontend/src/scenes/billing/BillingProductPricingTable.tsx +++ b/frontend/src/scenes/billing/BillingProductPricingTable.tsx @@ -1,5 +1,5 @@ import { IconArrowRightDown, IconInfo } from '@posthog/icons' -import { LemonTable, LemonTableColumns, Link } from '@posthog/lemon-ui' +import { LemonBanner, LemonTable, LemonTableColumns, Link } from '@posthog/lemon-ui' import { useValues } from 'kea' import { compactNumber } from 'lib/utils' @@ -192,6 +192,9 @@ export const BillingProductPricingTable = ({ .

)} + + Tier breakdowns are updated once daily and may differ from the gauge above. + ) : ( a.occurrences - b.occurrences, + }, + { + title: 'Sessions', + dataIndex: 'uniqueSessions', + sorter: (a, b) => a.uniqueSessions - b.uniqueSessions, + }, + ]} + loading={errorGroupsLoading} + dataSource={errorGroups} + expandable={{ + expandedRowRender: function renderExpand(group: ErrorTrackingGroup) { + return + }, + noIndent: true, + }} + /> + ) +} diff --git a/frontend/src/scenes/error-tracking/errorTrackingSceneLogic.ts b/frontend/src/scenes/error-tracking/errorTrackingSceneLogic.ts new file mode 100644 index 00000000000000..8cca4639c94d03 --- /dev/null +++ b/frontend/src/scenes/error-tracking/errorTrackingSceneLogic.ts @@ -0,0 +1,47 @@ +import { afterMount, kea, path } from 'kea' +import { loaders } from 'kea-loaders' +import api from 'lib/api' + +import { HogQLQuery, NodeKind } from '~/queries/schema' +import { hogql } from '~/queries/utils' +import { ErrorTrackingGroup } from '~/types' + +import type { errorTrackingSceneLogicType } from './errorTrackingSceneLogicType' + +export const errorTrackingSceneLogic = kea([ + path(['scenes', 'error-tracking', 'errorTrackingSceneLogic']), + + loaders(() => ({ + errorGroups: [ + [] as ErrorTrackingGroup[], + { + loadErrorGroups: async () => { + const query: HogQLQuery = { + kind: NodeKind.HogQLQuery, + query: hogql`SELECT first_value(properties), count(), count(distinct properties.$session_id) + FROM events e + WHERE event = '$exception' + -- grouping by message for now, will eventually be predefined $exception_group_id + GROUP BY properties.$exception_message`, + } + + const res = await api.query(query) + + return res.results.map((r) => { + const eventProperties = JSON.parse(r[0]) + return { + title: eventProperties['$exception_message'] || 'No message', + sampleEventProperties: eventProperties, + occurrences: r[2], + uniqueSessions: r[3], + } + }) + }, + }, + ], + })), + + afterMount(({ actions }) => { + actions.loadErrorGroups() + }), +]) diff --git a/frontend/src/scenes/pipeline/PipelineNodeLogs.tsx b/frontend/src/scenes/pipeline/PipelineNodeLogs.tsx index cdb72aecf8d293..d72f01f5a58529 100644 --- a/frontend/src/scenes/pipeline/PipelineNodeLogs.tsx +++ b/frontend/src/scenes/pipeline/PipelineNodeLogs.tsx @@ -1,4 +1,5 @@ -import { LemonButton, LemonCheckbox, LemonInput, LemonTable } from '@posthog/lemon-ui' +import { IconSearch } from '@posthog/icons' +import { LemonButton, LemonCheckbox, LemonInput, LemonSnack, LemonTable } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' import { LOGS_PORTION_LIMIT } from 'lib/constants' import { pluralize } from 'lib/utils' @@ -9,8 +10,9 @@ import { PipelineLogLevel, pipelineNodeLogsLogic } from './pipelineNodeLogsLogic export function PipelineNodeLogs({ id, stage }: PipelineNodeLogicProps): JSX.Element { const logic = pipelineNodeLogsLogic({ id, stage }) - const { logs, logsLoading, backgroundLogs, columns, isThereMoreToLoad, selectedLogLevels } = useValues(logic) - const { revealBackground, loadMoreLogs, setSelectedLogLevels, setSearchTerm } = useActions(logic) + const { logs, logsLoading, backgroundLogs, columns, isThereMoreToLoad, selectedLogLevels, instanceId } = + useValues(logic) + const { revealBackground, loadMoreLogs, setSelectedLogLevels, setSearchTerm, setInstanceId } = useActions(logic) return (
@@ -20,6 +22,13 @@ export function PipelineNodeLogs({ id, stage }: PipelineNodeLogicProps): JSX.Ele fullWidth onChange={setSearchTerm} allowClear + prefix={ + <> + + + {instanceId && setInstanceId('')}>{instanceId}} + + } />
Show logs of level: diff --git a/frontend/src/scenes/pipeline/hogfunctions/HogFunctionInputs.tsx b/frontend/src/scenes/pipeline/hogfunctions/HogFunctionInputs.tsx new file mode 100644 index 00000000000000..2f3cc63865729f --- /dev/null +++ b/frontend/src/scenes/pipeline/hogfunctions/HogFunctionInputs.tsx @@ -0,0 +1,287 @@ +import { Monaco } from '@monaco-editor/react' +import { IconPencil, IconPlus, IconX } from '@posthog/icons' +import { LemonButton, LemonCheckbox, LemonInput, LemonSelect } from '@posthog/lemon-ui' +import { useValues } from 'kea' +import { CodeEditor } from 'lib/components/CodeEditors' +import { languages } from 'monaco-editor' +import { useEffect, useMemo, useState } from 'react' + +import { groupsModel } from '~/models/groupsModel' +import { HogFunctionInputSchemaType } from '~/types' + +export type HogFunctionInputProps = { + schema: HogFunctionInputSchemaType + value?: any + onChange?: (value: any) => void + disabled?: boolean +} + +const SECRET_FIELD_VALUE = '********' + +function useAutocompleteOptions(): languages.CompletionItem[] { + const { groupTypes } = useValues(groupsModel) + + return useMemo(() => { + const options = [ + ['event', 'The entire event payload as a JSON object'], + ['event.name', 'The name of the event e.g. $pageview'], + ['event.distinct_id', 'The distinct_id of the event'], + ['event.timestamp', 'The timestamp of the event'], + ['event.url', 'URL to the event in PostHog'], + ['event.properties', 'Properties of the event'], + ['event.properties.', 'The individual property of the event'], + ['person', 'The entire person payload as a JSON object'], + ['project.uuid', 'The UUID of the Person in PostHog'], + ['person.url', 'URL to the person in PostHog'], + ['person.properties', 'Properties of the person'], + ['person.properties.', 'The individual property of the person'], + ['project.id', 'ID of the project in PostHog'], + ['project.name', 'Name of the project'], + ['project.url', 'URL to the project in PostHog'], + ['source.name', 'Name of the source of this message'], + ['source.url', 'URL to the source of this message in PostHog'], + ] + + groupTypes.forEach((groupType) => { + options.push([`groups.${groupType.group_type}`, `The entire group payload as a JSON object`]) + options.push([`groups.${groupType.group_type}.id`, `The ID or 'key' of the group`]) + options.push([`groups.${groupType.group_type}.url`, `URL to the group in PostHog`]) + options.push([`groups.${groupType.group_type}.properties`, `Properties of the group`]) + options.push([`groups.${groupType.group_type}.properties.`, `The individual property of the group`]) + options.push([`groups.${groupType.group_type}.index`, `Index of the group`]) + }) + + const items: languages.CompletionItem[] = options.map(([key, value]) => { + return { + label: key, + kind: languages.CompletionItemKind.Variable, + detail: value, + insertText: key, + range: { + startLineNumber: 1, + endLineNumber: 1, + startColumn: 0, + endColumn: 0, + }, + } + }) + + return items + }, [groupTypes]) +} + +function JsonConfigField(props: { + onChange?: (value: string) => void + className: string + autoFocus: boolean + value?: string | object +}): JSX.Element { + const suggestions = useAutocompleteOptions() + const [monaco, setMonaco] = useState() + + useEffect(() => { + if (!monaco) { + return + } + monaco.languages.setLanguageConfiguration('json', { + wordPattern: /[a-zA-Z0-9_\-.]+/, + }) + + const provider = monaco.languages.registerCompletionItemProvider('json', { + triggerCharacters: ['{', '{{'], + provideCompletionItems: async (model, position) => { + const word = model.getWordUntilPosition(position) + + const wordWithTrigger = model.getValueInRange({ + startLineNumber: position.lineNumber, + startColumn: 0, + endLineNumber: position.lineNumber, + endColumn: position.column, + }) + + if (wordWithTrigger.indexOf('{') === -1) { + return { suggestions: [] } + } + + const localSuggestions = suggestions.map((x) => ({ + ...x, + insertText: x.insertText, + range: { + startLineNumber: position.lineNumber, + endLineNumber: position.lineNumber, + startColumn: word.startColumn, + endColumn: word.endColumn, + }, + })) + + return { + suggestions: localSuggestions, + incomplete: false, + } + }, + }) + + return () => provider.dispose() + }, [suggestions, monaco]) + + return ( + props.onChange?.(v ?? '')} + options={{ + lineNumbers: 'off', + minimap: { + enabled: false, + }, + quickSuggestions: { + other: true, + strings: true, + }, + suggest: { + showWords: false, + showFields: false, + showKeywords: false, + }, + scrollbar: { + vertical: 'hidden', + verticalScrollbarSize: 0, + }, + }} + onMount={(_editor, monaco) => { + setMonaco(monaco) + }} + /> + ) +} + +function DictionaryField({ onChange, value }: { onChange?: (value: any) => void; value: any }): JSX.Element { + const [entries, setEntries] = useState<[string, string][]>(Object.entries(value ?? {})) + + useEffect(() => { + // NOTE: Filter out all empty entries as fetch will throw if passed in + const val = Object.fromEntries(entries.filter(([key, val]) => key.trim() !== '' || val.trim() !== '')) + onChange?.(val) + }, [entries]) + + return ( +
+ {entries.map(([key, val], index) => ( +
+ { + const newEntries = [...entries] + newEntries[index] = [key, newEntries[index][1]] + setEntries(newEntries) + }} + placeholder="Key" + /> + + { + const newEntries = [...entries] + newEntries[index] = [newEntries[index][0], val] + setEntries(newEntries) + }} + placeholder="Value" + /> + + } + size="small" + onClick={() => { + const newEntries = [...entries] + newEntries.splice(index, 1) + setEntries(newEntries) + }} + /> +
+ ))} + } + size="small" + type="secondary" + onClick={() => { + setEntries([...entries, ['', '']]) + }} + > + Add entry + +
+ ) +} + +export function HogFunctionInput({ value, onChange, schema, disabled }: HogFunctionInputProps): JSX.Element { + const [editingSecret, setEditingSecret] = useState(false) + if ( + schema.secret && + !editingSecret && + value && + (value === SECRET_FIELD_VALUE || value.name === SECRET_FIELD_VALUE) + ) { + return ( + } + onClick={() => { + onChange?.(schema.default || '') + setEditingSecret(true) + }} + disabled={disabled} + > + Reset secret variable + + ) + } + + switch (schema.type) { + case 'string': + return ( + + ) + case 'json': + return ( + + ) + case 'choice': + return ( + + ) + case 'dictionary': + return + + case 'boolean': + return onChange?.(checked)} disabled={disabled} /> + default: + return ( + + Unknown field type "{schema.type}". +
+ You may need to upgrade PostHog! +
+ ) + } +} diff --git a/frontend/src/scenes/pipeline/hogfunctions/HogFunctionInputsEditor.tsx b/frontend/src/scenes/pipeline/hogfunctions/HogFunctionInputsEditor.tsx new file mode 100644 index 00000000000000..59bc0fbfffaa1e --- /dev/null +++ b/frontend/src/scenes/pipeline/hogfunctions/HogFunctionInputsEditor.tsx @@ -0,0 +1,116 @@ +import { IconPlus, IconX } from '@posthog/icons' +import { LemonButton, LemonCheckbox, LemonInput, LemonInputSelect, LemonSelect } from '@posthog/lemon-ui' +import { capitalizeFirstLetter } from 'kea-forms' +import { useEffect, useState } from 'react' + +import { HogFunctionInputSchemaType } from '~/types' + +const typeList = ['string', 'boolean', 'dictionary', 'choice', 'json'] as const + +export type HogFunctionInputsEditorProps = { + value?: HogFunctionInputSchemaType[] + onChange?: (value: HogFunctionInputSchemaType[]) => void +} + +export function HogFunctionInputsEditor({ value, onChange }: HogFunctionInputsEditorProps): JSX.Element { + const [inputs, setInputs] = useState(value ?? []) + + useEffect(() => { + onChange?.(inputs) + }, [inputs]) + + return ( +
+ {inputs.map((input, index) => { + const _onChange = (data: Partial): void => { + setInputs((inputs) => { + const newInputs = [...inputs] + newInputs[index] = { ...newInputs[index], ...data } + return newInputs + }) + } + + return ( +
+
+ _onChange({ key })} + placeholder="Variable name" + /> + ({ + label: capitalizeFirstLetter(type), + value: type, + }))} + value={input.type} + className="w-30" + onChange={(type) => _onChange({ type })} + /> + + _onChange({ label })} + placeholder="Display label" + /> + _onChange({ required })} + label="Required" + bordered + /> + _onChange({ secret })} + label="Secret" + bordered + /> + {input.type === 'choice' && ( + choice.value)} + onChange={(choices) => + _onChange({ choices: choices.map((value) => ({ label: value, value })) }) + } + placeholder="Choices" + /> + )} +
+ } + size="small" + onClick={() => { + const newInputs = [...inputs] + newInputs.splice(index, 1) + setInputs(newInputs) + }} + /> +
+ ) + })} + +
+ } + size="small" + type="secondary" + onClick={() => { + setInputs([ + ...inputs, + { type: 'string', key: `input_${inputs.length + 1}`, label: '', required: false }, + ]) + }} + > + Add input variable + +
+
+ ) +} diff --git a/frontend/src/scenes/pipeline/hogfunctions/PipelineHogFunctionConfiguration.tsx b/frontend/src/scenes/pipeline/hogfunctions/PipelineHogFunctionConfiguration.tsx new file mode 100644 index 00000000000000..caeda41d63eef4 --- /dev/null +++ b/frontend/src/scenes/pipeline/hogfunctions/PipelineHogFunctionConfiguration.tsx @@ -0,0 +1,247 @@ +import { LemonButton, LemonInput, LemonSwitch, LemonTextArea, SpinnerOverlay } from '@posthog/lemon-ui' +import { useActions, useValues } from 'kea' +import { Form } from 'kea-forms' +import { NotFound } from 'lib/components/NotFound' +import { PageHeader } from 'lib/components/PageHeader' +import { TaxonomicFilterGroupType } from 'lib/components/TaxonomicFilter/types' +import { TestAccountFilterSwitch } from 'lib/components/TestAccountFiltersSwitch' +import { useFeatureFlag } from 'lib/hooks/useFeatureFlag' +import { LemonField } from 'lib/lemon-ui/LemonField' +import { HogQueryEditor } from 'scenes/debug/HogDebug' +import { ActionFilter } from 'scenes/insights/filters/ActionFilter/ActionFilter' +import { MathAvailability } from 'scenes/insights/filters/ActionFilter/ActionFilterRow/ActionFilterRow' + +import { groupsModel } from '~/models/groupsModel' +import { NodeKind } from '~/queries/schema' +import { EntityTypes } from '~/types' + +import { HogFunctionInput } from './HogFunctionInputs' +import { HogFunctionInputsEditor } from './HogFunctionInputsEditor' +import { pipelineHogFunctionConfigurationLogic } from './pipelineHogFunctionConfigurationLogic' + +export function PipelineHogFunctionConfiguration({ + templateId, + id, +}: { + templateId?: string + id?: string +}): JSX.Element { + const logicProps = { templateId, id } + const logic = pipelineHogFunctionConfigurationLogic(logicProps) + const { isConfigurationSubmitting, configurationChanged, showSource, configuration, loading, loaded } = + useValues(logic) + const { submitConfiguration, resetForm, setShowSource } = useActions(logic) + + const hogFunctionsEnabled = !!useFeatureFlag('HOG_FUNCTIONS') + const { groupsTaxonomicTypes } = useValues(groupsModel) + + if (loading && !loaded) { + return + } + + if (!loaded) { + return + } + + if (!hogFunctionsEnabled && !id) { + return ( +
+
+

Feature not enabled

+

Hog functions are not enabled for you yet. If you think they should be, contact support.

+
+
+ ) + } + const buttons = ( + <> + resetForm()} + disabledReason={ + !configurationChanged ? 'No changes' : isConfigurationSubmitting ? 'Saving in progress…' : undefined + } + > + Clear changes + + + {templateId ? 'Create' : 'Save'} + + + ) + + return ( +
+ +
+
+
+
+
+ 🦔 +
+ Hog Function +
+ + + {({ value, onChange }) => ( + onChange(!value)} + checked={value} + disabled={loading} + bordered + /> + )} + +
+ + + + + + +
+ +
+ + {({ value, onChange }) => ( + <> + onChange({ ...value, filter_test_accounts: val })} + fullWidth + /> + { + onChange({ + ...payload, + filter_test_accounts: value?.filter_test_accounts, + }) + }} + typeKey="plugin-filters" + mathAvailability={MathAvailability.None} + hideRename + hideDuplicate + showNestedArrow={false} + actionsTaxonomicGroupTypes={[ + TaxonomicFilterGroupType.Events, + TaxonomicFilterGroupType.Actions, + ]} + propertiesTaxonomicGroupTypes={[ + TaxonomicFilterGroupType.EventProperties, + TaxonomicFilterGroupType.EventFeatureFlags, + TaxonomicFilterGroupType.Elements, + TaxonomicFilterGroupType.PersonProperties, + TaxonomicFilterGroupType.HogQLExpression, + ...groupsTaxonomicTypes, + ]} + propertyFiltersPopover + addFilterDefaultOptions={{ + id: '$pageview', + name: '$pageview', + type: EntityTypes.EVENTS, + }} + buttonCopy="Add event filter" + /> + + )} + + +

+ This destination will be triggered if any of the above filters match. +

+
+
+ +
+
+
+

Function configuration

+ + setShowSource(!showSource)}> + {showSource ? 'Hide source code' : 'Show source code'} + +
+ + {showSource ? ( +
+ + + + + + {({ value, onChange }) => ( + // TODO: Fix this so we don't have to click "update and run" + { + onChange(q.code) + }} + /> + )} + +
+ ) : ( +
+ {configuration?.inputs_schema?.length ? ( + configuration?.inputs_schema.map((schema) => { + return ( +
+ + {({ value, onChange }) => { + return ( + onChange({ value: val })} + /> + ) + }} + +
+ ) + }) + ) : ( + + This function does not require any input variables. + + )} +
+ )} +
+
{buttons}
+
+
+
+
+ ) +} diff --git a/frontend/src/scenes/pipeline/hogfunctions/pipelineHogFunctionConfigurationLogic.tsx b/frontend/src/scenes/pipeline/hogfunctions/pipelineHogFunctionConfigurationLogic.tsx new file mode 100644 index 00000000000000..8c90cb6e93334f --- /dev/null +++ b/frontend/src/scenes/pipeline/hogfunctions/pipelineHogFunctionConfigurationLogic.tsx @@ -0,0 +1,250 @@ +import { actions, afterMount, kea, key, listeners, path, props, reducers, selectors } from 'kea' +import { forms } from 'kea-forms' +import { loaders } from 'kea-loaders' +import { router } from 'kea-router' +import { subscriptions } from 'kea-subscriptions' +import api from 'lib/api' +import { urls } from 'scenes/urls' + +import { + FilterType, + HogFunctionTemplateType, + HogFunctionType, + PipelineNodeTab, + PipelineStage, + PluginConfigFilters, + PluginConfigTypeNew, +} from '~/types' + +import type { pipelineHogFunctionConfigurationLogicType } from './pipelineHogFunctionConfigurationLogicType' +import { HOG_FUNCTION_TEMPLATES } from './templates/hog-templates' + +export interface PipelineHogFunctionConfigurationLogicProps { + templateId?: string + id?: string +} + +function sanitizeFilters(filters?: FilterType): PluginConfigTypeNew['filters'] { + if (!filters) { + return null + } + const sanitized: PluginConfigFilters = {} + + if (filters.events) { + sanitized.events = filters.events.map((f) => ({ + id: f.id, + type: 'events', + name: f.name, + order: f.order, + properties: f.properties, + })) + } + + if (filters.actions) { + sanitized.actions = filters.actions.map((f) => ({ + id: f.id, + type: 'actions', + name: f.name, + order: f.order, + properties: f.properties, + })) + } + + if (filters.filter_test_accounts) { + sanitized.filter_test_accounts = filters.filter_test_accounts + } + + return Object.keys(sanitized).length > 0 ? sanitized : undefined +} + +// Should likely be somewhat similar to pipelineBatchExportConfigurationLogic +export const pipelineHogFunctionConfigurationLogic = kea([ + props({} as PipelineHogFunctionConfigurationLogicProps), + key(({ id, templateId }: PipelineHogFunctionConfigurationLogicProps) => { + return id ?? templateId ?? 'new' + }), + path((id) => ['scenes', 'pipeline', 'pipelineHogFunctionConfigurationLogic', id]), + actions({ + setShowSource: (showSource: boolean) => ({ showSource }), + resetForm: true, + }), + reducers({ + showSource: [ + false, + { + setShowSource: (_, { showSource }) => showSource, + }, + ], + }), + loaders(({ props }) => ({ + template: [ + null as HogFunctionTemplateType | null, + { + loadTemplate: async () => { + if (!props.templateId) { + return null + } + const res = HOG_FUNCTION_TEMPLATES.find((template) => template.id === props.templateId) + + if (!res) { + throw new Error('Template not found') + } + return res + }, + }, + ], + + hogFunction: [ + null as HogFunctionType | null, + { + loadHogFunction: async () => { + if (!props.id) { + return null + } + + return await api.hogFunctions.get(props.id) + }, + }, + ], + })), + forms(({ values, props, actions }) => ({ + configuration: { + defaults: {} as HogFunctionType, + alwaysShowErrors: true, + errors: (data) => { + return { + name: !data.name ? 'Name is required' : null, + ...values.inputFormErrors, + } + }, + submit: async (data) => { + const sanitizedInputs = {} + + data.inputs_schema?.forEach((input) => { + if (input.type === 'json' && typeof data.inputs[input.key].value === 'string') { + try { + sanitizedInputs[input.key] = { + value: JSON.parse(data.inputs[input.key].value), + } + } catch (e) { + // Ignore + } + } else { + sanitizedInputs[input.key] = { + value: data.inputs[input.key].value, + } + } + }) + + const payload = { + ...data, + filters: data.filters ? sanitizeFilters(data.filters) : null, + inputs: sanitizedInputs, + } + + try { + if (!props.id) { + return await api.hogFunctions.create(payload) + } + return await api.hogFunctions.update(props.id, payload) + } catch (e) { + const maybeValidationError = (e as any).data + if (maybeValidationError?.type === 'validation_error') { + if (maybeValidationError.attr.includes('inputs__')) { + actions.setConfigurationManualErrors({ + inputs: { + [maybeValidationError.attr.split('__')[1]]: maybeValidationError.detail, + }, + }) + } else { + actions.setConfigurationManualErrors({ + [maybeValidationError.attr]: maybeValidationError.detail, + }) + } + } + throw e + } + }, + }, + })), + selectors(() => ({ + loading: [ + (s) => [s.hogFunctionLoading, s.templateLoading], + (hogFunctionLoading, templateLoading) => hogFunctionLoading || templateLoading, + ], + loaded: [(s) => [s.hogFunction, s.template], (hogFunction, template) => !!hogFunction || !!template], + + inputFormErrors: [ + (s) => [s.configuration], + (configuration) => { + const inputs = configuration.inputs ?? {} + const inputErrors = {} + + configuration.inputs_schema?.forEach((input) => { + if (input.required && !inputs[input.key]) { + inputErrors[input.key] = 'This field is required' + } + + if (input.type === 'json' && typeof inputs[input.key] === 'string') { + try { + JSON.parse(inputs[input.key].value) + } catch (e) { + inputErrors[input.key] = 'Invalid JSON' + } + } + }) + + return Object.keys(inputErrors).length > 0 + ? { + inputs: inputErrors, + } + : null + }, + ], + })), + + listeners(({ actions, values, cache, props }) => ({ + loadTemplateSuccess: () => actions.resetForm(), + loadHogFunctionSuccess: () => actions.resetForm(), + resetForm: () => { + const savedValue = values.hogFunction ?? values.template + actions.resetConfiguration({ + ...savedValue, + inputs: (savedValue as any)?.inputs ?? {}, + ...(cache.configFromUrl || {}), + }) + }, + + submitConfigurationSuccess: ({ configuration }) => { + if (!props.id) { + router.actions.replace( + urls.pipelineNode( + PipelineStage.Destination, + `hog-${configuration.id}`, + PipelineNodeTab.Configuration + ) + ) + } + }, + })), + afterMount(({ props, actions, cache }) => { + if (props.templateId) { + cache.configFromUrl = router.values.hashParams.configuration + actions.loadTemplate() // comes with plugin info + } else if (props.id) { + actions.loadHogFunction() + } + }), + + subscriptions(({ props, cache }) => ({ + configuration: (configuration) => { + if (props.templateId) { + // Sync state to the URL bar if new + cache.ignoreUrlChange = true + router.actions.replace(router.values.location.pathname, undefined, { + configuration, + }) + } + }, + })), +]) diff --git a/frontend/src/scenes/pipeline/hogfunctions/templates/hog-templates.tsx b/frontend/src/scenes/pipeline/hogfunctions/templates/hog-templates.tsx new file mode 100644 index 00000000000000..294159998ab2b7 --- /dev/null +++ b/frontend/src/scenes/pipeline/hogfunctions/templates/hog-templates.tsx @@ -0,0 +1,58 @@ +import { HogFunctionTemplateType } from '~/types' + +export const HOG_FUNCTION_TEMPLATES: HogFunctionTemplateType[] = [ + { + id: 'template-webhook', + name: 'HogHook', + description: 'Sends a webhook templated by the incoming event data', + hog: "fetch(inputs.url, {\n 'headers': inputs.headers,\n 'body': inputs.payload,\n 'method': inputs.method\n});", + inputs_schema: [ + { + key: 'url', + type: 'string', + label: 'Webhook URL', + secret: false, + required: true, + }, + { + key: 'method', + type: 'choice', + label: 'Method', + secret: false, + choices: [ + { + label: 'POST', + value: 'POST', + }, + { + label: 'PUT', + value: 'PUT', + }, + { + label: 'GET', + value: 'GET', + }, + { + label: 'DELETE', + value: 'DELETE', + }, + ], + required: false, + }, + { + key: 'payload', + type: 'json', + label: 'JSON Payload', + secret: false, + required: false, + }, + { + key: 'headers', + type: 'dictionary', + label: 'Headers', + secret: false, + required: false, + }, + ], + }, +] diff --git a/frontend/src/scenes/pipeline/pipelineNodeLogsLogic.tsx b/frontend/src/scenes/pipeline/pipelineNodeLogsLogic.tsx index 053ba517c17d3b..2488b6777d9919 100644 --- a/frontend/src/scenes/pipeline/pipelineNodeLogsLogic.tsx +++ b/frontend/src/scenes/pipeline/pipelineNodeLogsLogic.tsx @@ -1,4 +1,5 @@ -import { LemonTableColumns } from '@posthog/lemon-ui' +import { TZLabel } from '@posthog/apps-common' +import { LemonTableColumns, Link } from '@posthog/lemon-ui' import { actions, connect, events, kea, key, listeners, path, props, reducers, selectors } from 'kea' import { loaders } from 'kea-loaders' import { LOGS_PORTION_LIMIT } from 'lib/constants' @@ -28,13 +29,14 @@ export const pipelineNodeLogsLogic = kea([ key(({ id }) => id), path((key) => ['scenes', 'pipeline', 'pipelineNodeLogsLogic', key]), connect((props: PipelineNodeLogicProps) => ({ - values: [teamLogic(), ['currentTeamId'], pipelineNodeLogic(props), ['nodeBackend']], + values: [teamLogic(), ['currentTeamId'], pipelineNodeLogic(props), ['node']], })), actions({ setSelectedLogLevels: (levels: PipelineLogLevel[]) => ({ levels, }), setSearchTerm: (searchTerm: string) => ({ searchTerm }), + setInstanceId: (instanceId: string) => ({ instanceId }), clearBackgroundLogs: true, markLogsEnd: true, }), @@ -44,15 +46,24 @@ export const pipelineNodeLogsLogic = kea([ { loadLogs: async () => { let results: LogEntry[] - if (values.nodeBackend === PipelineBackend.BatchExport) { + if (values.node.backend === PipelineBackend.BatchExport) { results = await api.batchExportLogs.search( - id as string, + values.node.id, values.searchTerm, values.selectedLogLevels ) + } else if (values.node.backend === PipelineBackend.HogFunction) { + const res = await api.hogFunctions.searchLogs(values.node.id, { + search: values.searchTerm, + levels: values.selectedLogLevels, + limit: LOGS_PORTION_LIMIT, + instance_id: values.instanceId, + }) + + results = res.results } else { results = await api.pluginLogs.search( - id as number, + values.node.id, values.searchTerm, logLevelsToTypeFilters(values.selectedLogLevels) ) @@ -66,13 +77,23 @@ export const pipelineNodeLogsLogic = kea([ }, loadMoreLogs: async () => { let results: LogEntry[] - if (values.nodeBackend === PipelineBackend.BatchExport) { + if (values.node.backend === PipelineBackend.BatchExport) { results = await api.batchExportLogs.search( id as string, values.searchTerm, values.selectedLogLevels, values.trailingEntry as BatchExportLogEntry | null ) + } else if (values.node.backend === PipelineBackend.HogFunction) { + const res = await api.hogFunctions.searchLogs(values.node.id, { + search: values.searchTerm, + levels: values.selectedLogLevels, + limit: LOGS_PORTION_LIMIT, + before: values.trailingEntry?.timestamp, + instance_id: values.instanceId, + }) + + results = res.results } else { results = await api.pluginLogs.search( id as number, @@ -105,7 +126,7 @@ export const pipelineNodeLogsLogic = kea([ } let results: LogEntry[] - if (values.nodeBackend === PipelineBackend.BatchExport) { + if (values.node.backend === PipelineBackend.BatchExport) { results = await api.batchExportLogs.search( id as string, values.searchTerm, @@ -113,6 +134,16 @@ export const pipelineNodeLogsLogic = kea([ null, values.leadingEntry as BatchExportLogEntry | null ) + } else if (values.node.backend === PipelineBackend.HogFunction) { + const res = await api.hogFunctions.searchLogs(values.node.id, { + search: values.searchTerm, + levels: values.selectedLogLevels, + limit: LOGS_PORTION_LIMIT, + after: values.leadingEntry?.timestamp, + instance_id: values.instanceId, + }) + + results = res.results } else { results = await api.pluginLogs.search( id as number, @@ -147,6 +178,12 @@ export const pipelineNodeLogsLogic = kea([ setSearchTerm: (_, { searchTerm }) => searchTerm, }, ], + instanceId: [ + '', + { + setInstanceId: (_, { instanceId }) => instanceId, + }, + ], isThereMoreToLoad: [ true, { @@ -155,7 +192,7 @@ export const pipelineNodeLogsLogic = kea([ }, ], }), - selectors({ + selectors(({ actions }) => ({ leadingEntry: [ (s) => [s.logs, s.backgroundLogs], (logs: LogEntry[], backgroundLogs: LogEntry[]): LogEntry | null => { @@ -181,26 +218,76 @@ export const pipelineNodeLogsLogic = kea([ }, ], columns: [ - (s) => [s.nodeBackend], - (nodeBackend): LemonTableColumns => { + (s) => [s.node], + (node): LemonTableColumns => { return [ { title: 'Timestamp', key: 'timestamp', dataIndex: 'timestamp', sorter: (a: LogEntry, b: LogEntry) => dayjs(a.timestamp).unix() - dayjs(b.timestamp).unix(), - render: (timestamp: string) => dayjs(timestamp).format('YYYY-MM-DD HH:mm:ss.SSS UTC'), + render: (timestamp: string) => , + width: 0, }, { - title: nodeBackend === PipelineBackend.BatchExport ? 'Run Id' : 'Source', - dataIndex: nodeBackend === PipelineBackend.BatchExport ? 'run_id' : 'source', - key: nodeBackend === PipelineBackend.BatchExport ? 'run_id' : 'source', + width: 0, + title: + node.backend == PipelineBackend.HogFunction + ? 'Invocation' + : node.backend == PipelineBackend.BatchExport + ? 'Run Id' + : 'Source', + dataIndex: + node.backend == PipelineBackend.HogFunction + ? 'instance_id' + : node.backend == PipelineBackend.BatchExport + ? 'run_id' + : 'source', + key: + node.backend == PipelineBackend.HogFunction + ? 'instance_id' + : node.backend == PipelineBackend.BatchExport + ? 'run_id' + : 'source', + + render: (instanceId: string) => ( + + {node.backend === PipelineBackend.HogFunction ? ( + { + actions.setInstanceId(instanceId) + }} + > + {instanceId} + + ) : ( + instanceId + )} + + ), }, { + width: 100, title: 'Level', - key: nodeBackend === PipelineBackend.BatchExport ? 'level' : 'type', - dataIndex: nodeBackend === PipelineBackend.BatchExport ? 'level' : 'type', - render: nodeBackend === PipelineBackend.BatchExport ? LogLevelDisplay : LogTypeDisplay, + key: + node.backend == PipelineBackend.HogFunction + ? 'level' + : node.backend == PipelineBackend.BatchExport + ? 'level' + : 'type', + dataIndex: + node.backend == PipelineBackend.HogFunction + ? 'level' + : node.backend == PipelineBackend.BatchExport + ? 'level' + : 'type', + render: + node.backend == PipelineBackend.HogFunction + ? LogLevelDisplay + : node.backend == PipelineBackend.BatchExport + ? LogLevelDisplay + : LogTypeDisplay, }, { title: 'Message', @@ -211,7 +298,7 @@ export const pipelineNodeLogsLogic = kea([ ] as LemonTableColumns }, ], - }), + })), listeners(({ actions }) => ({ setSelectedLogLevels: () => { actions.loadLogs() @@ -222,6 +309,9 @@ export const pipelineNodeLogsLogic = kea([ } actions.loadLogs() }, + setInstanceId: async () => { + actions.loadLogs() + }, })), events(({ actions, cache }) => ({ afterMount: () => { diff --git a/frontend/src/scenes/session-recordings/filters/RecordingsUniversalFilters.tsx b/frontend/src/scenes/session-recordings/filters/RecordingsUniversalFilters.tsx new file mode 100644 index 00000000000000..2625549c1b53d0 --- /dev/null +++ b/frontend/src/scenes/session-recordings/filters/RecordingsUniversalFilters.tsx @@ -0,0 +1,138 @@ +import { useActions, useMountedLogic, useValues } from 'kea' +import { DateFilter } from 'lib/components/DateFilter/DateFilter' +import { TaxonomicFilterGroupType } from 'lib/components/TaxonomicFilter/types' +import UniversalFilters from 'lib/components/UniversalFilters/UniversalFilters' +import { universalFiltersLogic } from 'lib/components/UniversalFilters/universalFiltersLogic' +import { isUniversalGroupFilterLike } from 'lib/components/UniversalFilters/utils' +import { TestAccountFilter } from 'scenes/insights/filters/TestAccountFilter' + +import { actionsModel } from '~/models/actionsModel' +import { cohortsModel } from '~/models/cohortsModel' +import { AndOrFilterSelect } from '~/queries/nodes/InsightViz/PropertyGroupFilters/AndOrFilterSelect' + +import { sessionRecordingsPlaylistLogic } from '../playlist/sessionRecordingsPlaylistLogic' +import { DurationFilter } from './DurationFilter' + +export const RecordingsUniversalFilters = (): JSX.Element => { + useMountedLogic(cohortsModel) + useMountedLogic(actionsModel) + const { universalFilters } = useValues(sessionRecordingsPlaylistLogic) + const { setUniversalFilters } = useActions(sessionRecordingsPlaylistLogic) + + const durationFilter = universalFilters.duration[0] + + return ( +
+
+
+ { + setUniversalFilters({ + ...universalFilters, + date_from: changedDateFrom, + date_to: changedDateTo, + }) + }} + dateOptions={[ + { key: 'Custom', values: [] }, + { key: 'Last 24 hours', values: ['-24h'] }, + { key: 'Last 3 days', values: ['-3d'] }, + { key: 'Last 7 days', values: ['-7d'] }, + { key: 'Last 30 days', values: ['-30d'] }, + { key: 'All time', values: ['-90d'] }, + ]} + dropdownPlacement="bottom-start" + size="small" + /> + { + setUniversalFilters({ + duration: [{ ...newRecordingDurationFilter, key: newDurationType }], + }) + }} + recordingDurationFilter={durationFilter} + durationTypeFilter={durationFilter.key} + pageKey="session-recordings" + /> + + setUniversalFilters({ + ...universalFilters, + filter_test_accounts: testFilters.filter_test_accounts, + }) + } + /> +
+
+ { + setUniversalFilters({ + ...universalFilters, + filter_group: { + type: type, + values: universalFilters.filter_group.values, + }, + }) + }} + disabledReason="'Or' filtering is not supported yet" + topLevelFilter={true} + suffix={['filter', 'filters']} + /> +
+
+
+ { + setUniversalFilters({ + ...universalFilters, + filter_group: filterGroup, + }) + }} + > + + +
+
+ ) +} + +const RecordingsUniversalFilterGroup = (): JSX.Element => { + const { filterGroup } = useValues(universalFiltersLogic) + const { replaceGroupValue, removeGroupValue } = useActions(universalFiltersLogic) + + return ( + <> + {filterGroup.values.map((filterOrGroup, index) => { + return isUniversalGroupFilterLike(filterOrGroup) ? ( + + + + + ) : ( + removeGroupValue(index)} + onChange={(value) => replaceGroupValue(index, value)} + /> + ) + })} + + ) +} diff --git a/frontend/src/scenes/session-recordings/filters/ReplayTaxonomicFilters.tsx b/frontend/src/scenes/session-recordings/filters/ReplayTaxonomicFilters.tsx new file mode 100644 index 00000000000000..345f66b1c90b6a --- /dev/null +++ b/frontend/src/scenes/session-recordings/filters/ReplayTaxonomicFilters.tsx @@ -0,0 +1,109 @@ +import { IconTrash } from '@posthog/icons' +import { LemonButton, Popover } from '@posthog/lemon-ui' +import { useActions, useValues } from 'kea' +import { PropertyKeyInfo } from 'lib/components/PropertyKeyInfo' +import { TaxonomicFilter } from 'lib/components/TaxonomicFilter/TaxonomicFilter' +import { TaxonomicFilterGroupType, TaxonomicFilterValue } from 'lib/components/TaxonomicFilter/types' +import { universalFiltersLogic } from 'lib/components/UniversalFilters/universalFiltersLogic' +import { useState } from 'react' + +import { PropertyFilterType } from '~/types' + +import { playerSettingsLogic } from '../player/playerSettingsLogic' + +export interface ReplayTaxonomicFiltersProps { + onChange: (value: TaxonomicFilterValue, item?: any) => void +} + +export function ReplayTaxonomicFilters({ onChange }: ReplayTaxonomicFiltersProps): JSX.Element { + const { + filterGroup: { values: filters }, + } = useValues(universalFiltersLogic) + + const hasConsoleLogLevelFilter = filters.find( + (f) => f.type === PropertyFilterType.Recording && f.key === 'console_log_level' + ) + const hasConsoleLogQueryFilter = filters.find( + (f) => f.type === PropertyFilterType.Recording && f.key === 'console_log_query' + ) + + return ( +
+
+
Session properties
+
    + onChange('console_log_level', {})} + disabledReason={hasConsoleLogLevelFilter ? 'Log level filter already added' : undefined} + > + Console log level + + onChange('console_log_query', {})} + disabledReason={hasConsoleLogQueryFilter ? 'Log text filter already added' : undefined} + > + Console log text + +
+
+ + +
+ ) +} + +const PersonProperties = ({ onChange }: { onChange: ReplayTaxonomicFiltersProps['onChange'] }): JSX.Element => { + const { quickFilterProperties: properties } = useValues(playerSettingsLogic) + const { setQuickFilterProperties } = useActions(playerSettingsLogic) + + const [showPropertySelector, setShowPropertySelector] = useState(false) + + return ( +
+
Person properties
+
    + {properties.map((property) => ( + { + const newProperties = properties.filter((p) => p != property) + setQuickFilterProperties(newProperties) + }, + icon: , + }} + onClick={() => onChange(property, { propertyFilterType: PropertyFilterType.Person })} + > + + + ))} + setShowPropertySelector(false)} + placement="right-start" + overlay={ + { + properties.push(value as string) + setQuickFilterProperties([...properties]) + setShowPropertySelector(false) + }} + taxonomicGroupTypes={[TaxonomicFilterGroupType.PersonProperties]} + excludedProperties={{ [TaxonomicFilterGroupType.PersonProperties]: properties }} + /> + } + > + setShowPropertySelector(!showPropertySelector)} fullWidth> + Add property + + +
+
+ ) +} diff --git a/frontend/src/scenes/settings/SettingsMap.tsx b/frontend/src/scenes/settings/SettingsMap.tsx index 62a94eaed4342f..ee7c6c3b363b52 100644 --- a/frontend/src/scenes/settings/SettingsMap.tsx +++ b/frontend/src/scenes/settings/SettingsMap.tsx @@ -126,8 +126,9 @@ export const SettingsMap: SettingSection[] = [ }, { id: 'persons-on-events', - title: 'Event person filtering behavior', + title: 'Person properties mode', component: , + flag: '!SETTINGS_PERSONS_ON_EVENTS_HIDDEN', // Setting hidden for Cloud orgs created since June 2024 }, { id: 'correlation-analysis', diff --git a/frontend/src/scenes/settings/settingsLogic.ts b/frontend/src/scenes/settings/settingsLogic.ts index 248fd4e627425c..f00d8e523aab51 100644 --- a/frontend/src/scenes/settings/settingsLogic.ts +++ b/frontend/src/scenes/settings/settingsLogic.ts @@ -62,7 +62,14 @@ export const settingsLogic = kea([ sections: [ (s) => [s.featureFlags], (featureFlags): SettingSection[] => { - return SettingsMap.filter((x) => (x.flag ? featureFlags[FEATURE_FLAGS[x.flag]] : true)) + return SettingsMap.filter((x) => { + const isFlagConditionMet = !x.flag + ? true // No flag condition + : x.flag.startsWith('!') + ? !featureFlags[FEATURE_FLAGS[x.flag.slice(1)]] // Negated flag condition (!-prefixed) + : featureFlags[FEATURE_FLAGS[x.flag]] // Regular flag condition + return isFlagConditionMet + }) }, ], selectedSection: [ @@ -96,14 +103,17 @@ export const settingsLogic = kea([ } return settings.filter((x) => { + const isFlagConditionMet = !x.flag + ? true // No flag condition + : x.flag.startsWith('!') + ? !featureFlags[FEATURE_FLAGS[x.flag.slice(1)]] // Negated flag condition (!-prefixed) + : featureFlags[FEATURE_FLAGS[x.flag]] // Regular flag condition if (x.flag && x.features) { - return ( - x.features.some((feat) => hasAvailableFeature(feat)) || featureFlags[FEATURE_FLAGS[x.flag]] - ) + return x.features.some((feat) => hasAvailableFeature(feat)) || isFlagConditionMet } else if (x.features) { return x.features.some((feat) => hasAvailableFeature(feat)) } else if (x.flag) { - return featureFlags[FEATURE_FLAGS[x.flag]] + return isFlagConditionMet } return true diff --git a/frontend/src/scenes/settings/types.ts b/frontend/src/scenes/settings/types.ts index bc44d6808f276e..d63d797ef536a0 100644 --- a/frontend/src/scenes/settings/types.ts +++ b/frontend/src/scenes/settings/types.ts @@ -83,12 +83,18 @@ export type SettingId = | 'persons-join-mode' | 'bounce-rate-page-view-mode' +type FeatureFlagKey = keyof typeof FEATURE_FLAGS + export type Setting = { id: SettingId title: string description?: JSX.Element | string component: JSX.Element - flag?: keyof typeof FEATURE_FLAGS + /** + * Feature flag to gate the setting being shown. + * If prefixed with !, the condition is inverted - the setting will only be shown if the is flag false. + */ + flag?: FeatureFlagKey | `!${FeatureFlagKey}` features?: AvailableFeature[] } @@ -97,6 +103,10 @@ export type SettingSection = { title: string level: SettingLevelId settings: Setting[] - flag?: keyof typeof FEATURE_FLAGS + /** + * Feature flag to gate the section being shown. + * If prefixed with !, the condition is inverted - the section will only be shown if the is flag false. + */ + flag?: FeatureFlagKey | `!${FeatureFlagKey}` minimumAccessLevel?: EitherMembershipLevel } diff --git a/frontend/src/scenes/surveys/QuestionBranchingInput.tsx b/frontend/src/scenes/surveys/QuestionBranchingInput.tsx new file mode 100644 index 00000000000000..96c6ea55912d6b --- /dev/null +++ b/frontend/src/scenes/surveys/QuestionBranchingInput.tsx @@ -0,0 +1,68 @@ +import './EditSurvey.scss' + +import { LemonSelect } from '@posthog/lemon-ui' +import { useActions, useValues } from 'kea' +import { LemonField } from 'lib/lemon-ui/LemonField' + +import { MultipleSurveyQuestion, RatingSurveyQuestion, SurveyQuestionBranchingType } from '~/types' + +import { surveyLogic } from './surveyLogic' + +export function QuestionBranchingInput({ + questionIndex, + question, +}: { + questionIndex: number + question: RatingSurveyQuestion | MultipleSurveyQuestion +}): JSX.Element { + const { survey, getBranchingDropdownValue } = useValues(surveyLogic) + const { setQuestionBranching } = useActions(surveyLogic) + + const availableNextQuestions = survey.questions + .map((question, questionIndex) => ({ + ...question, + questionIndex, + })) + .filter((_, idx) => questionIndex !== idx) + const branchingDropdownValue = getBranchingDropdownValue(questionIndex, question) + + return ( + <> + + setQuestionBranching(questionIndex, value)} + options={[ + ...(questionIndex < survey.questions.length - 1 + ? [ + { + label: 'Next question', + value: SurveyQuestionBranchingType.NextQuestion, + }, + ] + : []), + { + label: 'Confirmation message', + value: SurveyQuestionBranchingType.ConfirmationMessage, + }, + { + label: 'Specific question based on answer', + value: SurveyQuestionBranchingType.ResponseBased, + }, + ...availableNextQuestions.map((question) => ({ + label: `${question.questionIndex + 1}. ${question.question}`, + value: `${SurveyQuestionBranchingType.SpecificQuestion}:${question.questionIndex}`, + })), + ]} + /> + + {branchingDropdownValue === SurveyQuestionBranchingType.ResponseBased && ( +
+ TODO: dropdowns for the response-based branching +
+ )} + + ) +} diff --git a/hogvm/typescript/src/index.ts b/hogvm/typescript/src/index.ts new file mode 100644 index 00000000000000..20f547aef5f9fd --- /dev/null +++ b/hogvm/typescript/src/index.ts @@ -0,0 +1,3 @@ +export * from './execute' +export * from './operation' +export * from './utils' diff --git a/mypy-baseline.txt b/mypy-baseline.txt index d3e4f2d3fc6059..ec15c786c8f5ed 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -26,22 +26,6 @@ posthog/temporal/data_imports/pipelines/rest_source/config_setup.py:0: error: Un posthog/temporal/data_imports/pipelines/rest_source/config_setup.py:0: error: Unpacked dict entry 1 has incompatible type "dict[str, Any] | None"; expected "SupportsKeysAndGetItem[str, Any]" [dict-item] posthog/temporal/data_imports/pipelines/rest_source/config_setup.py:0: error: Unpacked dict entry 0 has incompatible type "dict[str, Any] | None"; expected "SupportsKeysAndGetItem[str, ResolveParamConfig | IncrementalParamConfig | Any]" [dict-item] posthog/temporal/data_imports/pipelines/rest_source/config_setup.py:0: error: Unpacked dict entry 1 has incompatible type "dict[str, ResolveParamConfig | IncrementalParamConfig | Any] | None"; expected "SupportsKeysAndGetItem[str, ResolveParamConfig | IncrementalParamConfig | Any]" [dict-item] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Not all union combinations were tried because there are too many unions [misc] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 2 to "source" has incompatible type "str | None"; expected "str" [arg-type] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 3 to "source" has incompatible type "str | None"; expected "str" [arg-type] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 4 to "source" has incompatible type "int | None"; expected "int" [arg-type] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 6 to "source" has incompatible type "Schema | None"; expected "Schema" [arg-type] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 7 to "source" has incompatible type "Literal['evolve', 'discard_value', 'freeze', 'discard_row'] | TSchemaContractDict | None"; expected "Literal['evolve', 'discard_value', 'freeze', 'discard_row'] | TSchemaContractDict" [arg-type] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 8 to "source" has incompatible type "type[BaseConfiguration] | None"; expected "type[BaseConfiguration]" [arg-type] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 1 to "build_resource_dependency_graph" has incompatible type "EndpointResourceBase | None"; expected "EndpointResourceBase" [arg-type] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Need type annotation for "resources" (hint: "resources: dict[, ] = ...") [var-annotated] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Incompatible types in assignment (expression has type "ResolvedParam | None", variable has type "ResolvedParam") [assignment] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Incompatible types in assignment (expression has type "list[str] | None", variable has type "list[str]") [assignment] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 1 to "setup_incremental_object" has incompatible type "dict[str, ResolveParamConfig | IncrementalParamConfig | Any] | None"; expected "dict[str, Any]" [arg-type] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Statement is unreachable [unreachable] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 1 to "exclude_keys" has incompatible type "dict[str, ResolveParamConfig | IncrementalParamConfig | Any] | None"; expected "Mapping[str, Any]" [arg-type] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Incompatible default for argument "incremental_param" (default has type "IncrementalParam | None", argument has type "IncrementalParam") [assignment] -posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument "module" to "SourceInfo" has incompatible type Module | None; expected Module [arg-type] posthog/hogql/database/schema/numbers.py:0: error: Incompatible types in assignment (expression has type "dict[str, IntegerDatabaseField]", variable has type "dict[str, FieldOrTable]") [assignment] posthog/hogql/database/schema/numbers.py:0: note: "Dict" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql/database/schema/numbers.py:0: note: Consider using "Mapping" instead, which is covariant in the value type @@ -625,6 +609,21 @@ posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 to "ensure_pendulum_datetime" has incompatible type "DateTime | Date | datetime | date | str | float | int | None"; expected "DateTime | Date | datetime | date | str | float | int" [arg-type] posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Item "None" of "DateTime | None" has no attribute "int_timestamp" [union-attr] posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 to "ensure_pendulum_datetime" has incompatible type "str | None"; expected "DateTime | Date | datetime | date | str | float | int" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Not all union combinations were tried because there are too many unions [misc] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 2 to "source" has incompatible type "str | None"; expected "str" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 3 to "source" has incompatible type "str | None"; expected "str" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 4 to "source" has incompatible type "int | None"; expected "int" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 6 to "source" has incompatible type "Schema | None"; expected "Schema" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 7 to "source" has incompatible type "Literal['evolve', 'discard_value', 'freeze', 'discard_row'] | TSchemaContractDict | None"; expected "Literal['evolve', 'discard_value', 'freeze', 'discard_row'] | TSchemaContractDict" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 8 to "source" has incompatible type "type[BaseConfiguration] | None"; expected "type[BaseConfiguration]" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 1 to "build_resource_dependency_graph" has incompatible type "EndpointResourceBase | None"; expected "EndpointResourceBase" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Incompatible types in assignment (expression has type "list[str] | None", variable has type "list[str]") [assignment] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 1 to "setup_incremental_object" has incompatible type "dict[str, ResolveParamConfig | IncrementalParamConfig | Any] | None"; expected "dict[str, Any]" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument "base_url" to "RESTClient" has incompatible type "str | None"; expected "str" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument 1 to "exclude_keys" has incompatible type "dict[str, ResolveParamConfig | IncrementalParamConfig | Any] | None"; expected "Mapping[str, Any]" [arg-type] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Incompatible default for argument "resolved_param" (default has type "ResolvedParam | None", argument has type "ResolvedParam") [assignment] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Unused "type: ignore" comment [unused-ignore] +posthog/temporal/data_imports/pipelines/rest_source/__init__.py:0: error: Argument "module" to "SourceInfo" has incompatible type Module | None; expected Module [arg-type] posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a return type annotation [no-untyped-def] posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation [no-untyped-def] posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] @@ -721,6 +720,13 @@ posthog/temporal/tests/batch_exports/test_run_updates.py:0: error: Unused "type: posthog/temporal/tests/batch_exports/test_run_updates.py:0: error: Unused "type: ignore" comment [unused-ignore] posthog/temporal/tests/batch_exports/test_run_updates.py:0: error: Unused "type: ignore" comment [unused-ignore] posthog/temporal/tests/batch_exports/test_batch_exports.py:0: error: TypedDict key must be a string literal; expected one of ("_timestamp", "created_at", "distinct_id", "elements", "elements_chain", ...) [literal-required] +posthog/temporal/data_imports/pipelines/stripe/__init__.py:0: error: Unused "type: ignore" comment [unused-ignore] +posthog/temporal/data_imports/pipelines/stripe/__init__.py:0: error: Unused "type: ignore" comment [unused-ignore] +posthog/temporal/data_imports/pipelines/stripe/__init__.py:0: error: Unused "type: ignore" comment [unused-ignore] +posthog/temporal/data_imports/pipelines/stripe/__init__.py:0: error: Unused "type: ignore" comment [unused-ignore] +posthog/temporal/data_imports/pipelines/stripe/__init__.py:0: error: Unused "type: ignore" comment [unused-ignore] +posthog/temporal/data_imports/pipelines/stripe/__init__.py:0: error: Unused "type: ignore" comment [unused-ignore] +posthog/temporal/data_imports/pipelines/stripe/__init__.py:0: error: Unused "type: ignore" comment [unused-ignore] posthog/session_recordings/session_recording_api.py:0: error: Argument "team_id" to "get_realtime_snapshots" has incompatible type "int"; expected "str" [arg-type] posthog/queries/app_metrics/test/test_app_metrics.py:0: error: Argument 3 to "AppMetricsErrorDetailsQuery" has incompatible type "AppMetricsRequestSerializer"; expected "AppMetricsErrorsRequestSerializer" [arg-type] posthog/queries/app_metrics/test/test_app_metrics.py:0: error: Argument 3 to "AppMetricsErrorDetailsQuery" has incompatible type "AppMetricsRequestSerializer"; expected "AppMetricsErrorsRequestSerializer" [arg-type] diff --git a/plugin-server/src/cdp/cdp-processed-events-consumer.ts b/plugin-server/src/cdp/cdp-processed-events-consumer.ts new file mode 100644 index 00000000000000..b98e07ed796383 --- /dev/null +++ b/plugin-server/src/cdp/cdp-processed-events-consumer.ts @@ -0,0 +1,278 @@ +import { features, librdkafkaVersion, Message } from 'node-rdkafka' +import { Histogram } from 'prom-client' + +import { KAFKA_EVENTS_JSON, KAFKA_LOG_ENTRIES } from '../config/kafka-topics' +import { BatchConsumer, startBatchConsumer } from '../kafka/batch-consumer' +import { createRdConnectionConfigFromEnvVars, createRdProducerConfigFromEnvVars } from '../kafka/config' +import { createKafkaProducer } from '../kafka/producer' +import { addSentryBreadcrumbsEventListeners } from '../main/ingestion-queues/kafka-metrics' +import { runInstrumentedFunction } from '../main/utils' +import { GroupTypeToColumnIndex, Hub, PluginsServerConfig, RawClickHouseEvent, TeamId } from '../types' +import { KafkaProducerWrapper } from '../utils/db/kafka-producer-wrapper' +import { PostgresRouter } from '../utils/db/postgres' +import { status } from '../utils/status' +import { AppMetrics } from '../worker/ingestion/app-metrics' +import { GroupTypeManager } from '../worker/ingestion/group-type-manager' +import { OrganizationManager } from '../worker/ingestion/organization-manager' +import { TeamManager } from '../worker/ingestion/team-manager' +import { RustyHook } from '../worker/rusty-hook' +import { HogExecutor } from './hog-executor' +import { HogFunctionManager } from './hog-function-manager' +import { HogFunctionInvocationGlobals, HogFunctionInvocationResult, HogFunctionLogEntry } from './types' +import { convertToHogFunctionInvocationGlobals } from './utils' + +// Must require as `tsc` strips unused `import` statements and just requiring this seems to init some globals +require('@sentry/tracing') + +// WARNING: Do not change this - it will essentially reset the consumer +const KAFKA_CONSUMER_GROUP_ID = 'cdp-function-executor' +const BUCKETS_KB_WRITTEN = [0, 128, 512, 1024, 5120, 10240, 20480, 51200, 102400, 204800, Infinity] + +const histogramKafkaBatchSize = new Histogram({ + name: 'cdp_function_executor_batch_size', + help: 'The size of the batches we are receiving from Kafka', + buckets: [0, 50, 100, 250, 500, 750, 1000, 1500, 2000, 3000, Infinity], +}) + +const histogramKafkaBatchSizeKb = new Histogram({ + name: 'cdp_function_executor_batch_size_kb', + help: 'The size in kb of the batches we are receiving from Kafka', + buckets: BUCKETS_KB_WRITTEN, +}) + +export interface TeamIDWithConfig { + teamId: TeamId | null + consoleLogIngestionEnabled: boolean +} + +export class CdpProcessedEventsConsumer { + batchConsumer?: BatchConsumer + teamManager: TeamManager + organizationManager: OrganizationManager + groupTypeManager: GroupTypeManager + hogFunctionManager: HogFunctionManager + hogExecutor: HogExecutor + appMetrics?: AppMetrics + topic: string + consumerGroupId: string + isStopping = false + + private kafkaProducer?: KafkaProducerWrapper + + private promises: Set> = new Set() + + constructor(private config: PluginsServerConfig, private hub?: Hub) { + this.topic = KAFKA_EVENTS_JSON + this.consumerGroupId = KAFKA_CONSUMER_GROUP_ID + + const postgres = hub?.postgres ?? new PostgresRouter(config) + + this.teamManager = new TeamManager(postgres, config) + this.organizationManager = new OrganizationManager(postgres, this.teamManager) + this.groupTypeManager = new GroupTypeManager(postgres, this.teamManager) + this.hogFunctionManager = new HogFunctionManager(postgres, config) + const rustyHook = this.hub?.rustyHook ?? new RustyHook(this.config) + this.hogExecutor = new HogExecutor(this.config, this.hogFunctionManager, rustyHook) + } + + private scheduleWork(promise: Promise): Promise { + this.promises.add(promise) + void promise.finally(() => this.promises.delete(promise)) + return promise + } + + public async consume(event: HogFunctionInvocationGlobals): Promise { + return await this.hogExecutor!.executeMatchingFunctions(event) + } + + public async handleEachBatch(messages: Message[], heartbeat: () => void): Promise { + status.info('🔁', `cdp-function-executor - handling batch`, { + size: messages.length, + }) + await runInstrumentedFunction({ + statsKey: `cdpFunctionExecutor.handleEachBatch`, + sendTimeoutGuardToSentry: false, + func: async () => { + histogramKafkaBatchSize.observe(messages.length) + histogramKafkaBatchSizeKb.observe(messages.reduce((acc, m) => (m.value?.length ?? 0) + acc, 0) / 1024) + + const events: HogFunctionInvocationGlobals[] = [] + + await runInstrumentedFunction({ + statsKey: `cdpFunctionExecutor.handleEachBatch.parseKafkaMessages`, + func: async () => { + // TODO: Early exit for events without associated hooks + + await Promise.all( + messages.map(async (message) => { + try { + const clickHouseEvent = JSON.parse(message.value!.toString()) as RawClickHouseEvent + + if (!this.hogFunctionManager.teamHasHogFunctions(clickHouseEvent.team_id)) { + // No need to continue if the team doesn't have any functions + return + } + + let groupTypes: GroupTypeToColumnIndex | undefined = undefined + + if ( + await this.organizationManager.hasAvailableFeature( + clickHouseEvent.team_id, + 'group_analytics' + ) + ) { + // If the organization has group analytics enabled then we enrich the event with group data + groupTypes = await this.groupTypeManager.fetchGroupTypes( + clickHouseEvent.team_id + ) + } + + const team = await this.teamManager.fetchTeam(clickHouseEvent.team_id) + if (!team) { + return + } + events.push( + convertToHogFunctionInvocationGlobals( + clickHouseEvent, + team, + this.config.SITE_URL ?? 'http://localhost:8000', + groupTypes + ) + ) + } catch (e) { + status.error('Error parsing message', e) + } + }) + ) + }, + }) + heartbeat() + + const invocationResults: HogFunctionInvocationResult[] = [] + + if (!events.length) { + return + } + + await runInstrumentedFunction({ + statsKey: `cdpFunctionExecutor.handleEachBatch.consumeBatch`, + func: async () => { + const results = await Promise.all(events.map((e) => this.consume(e))) + invocationResults.push(...results.flat()) + }, + }) + + heartbeat() + + // TODO: Follow up - process metrics from the invocationResults + await runInstrumentedFunction({ + statsKey: `cdpFunctionExecutor.handleEachBatch.queueMetrics`, + func: async () => { + const allLogs = invocationResults.reduce((acc, result) => { + return [...acc, ...result.logs] + }, [] as HogFunctionLogEntry[]) + + await Promise.all( + allLogs.map((x) => + this.kafkaProducer!.produce({ + topic: KAFKA_LOG_ENTRIES, + value: Buffer.from(JSON.stringify(x)), + key: x.instance_id, + waitForAck: true, + }) + ) + ) + + if (allLogs.length) { + status.info('🔁', `cdp-function-executor - produced logs`, { + size: allLogs.length, + }) + } + }, + }) + }, + }) + } + + public async start(): Promise { + status.info('🔁', 'cdp-function-executor - starting', { + librdKafkaVersion: librdkafkaVersion, + kafkaCapabilities: features, + }) + + // NOTE: This is the only place where we need to use the shared server config + const globalConnectionConfig = createRdConnectionConfigFromEnvVars(this.config) + const globalProducerConfig = createRdProducerConfigFromEnvVars(this.config) + + await this.hogFunctionManager.start() + + this.kafkaProducer = new KafkaProducerWrapper( + await createKafkaProducer(globalConnectionConfig, globalProducerConfig) + ) + + this.appMetrics = + this.hub?.appMetrics ?? + new AppMetrics( + this.kafkaProducer, + this.config.APP_METRICS_FLUSH_FREQUENCY_MS, + this.config.APP_METRICS_FLUSH_MAX_QUEUE_SIZE + ) + this.kafkaProducer.producer.connect() + + this.batchConsumer = await startBatchConsumer({ + connectionConfig: createRdConnectionConfigFromEnvVars(this.config), + groupId: this.consumerGroupId, + topic: this.topic, + autoCommit: true, + sessionTimeout: this.config.KAFKA_CONSUMPTION_SESSION_TIMEOUT_MS, + maxPollIntervalMs: this.config.KAFKA_CONSUMPTION_MAX_POLL_INTERVAL_MS, + // the largest size of a message that can be fetched by the consumer. + // the largest size our MSK cluster allows is 20MB + // we only use 9 or 10MB but there's no reason to limit this 🤷️ + consumerMaxBytes: this.config.KAFKA_CONSUMPTION_MAX_BYTES, + consumerMaxBytesPerPartition: this.config.KAFKA_CONSUMPTION_MAX_BYTES_PER_PARTITION, + // our messages are very big, so we don't want to buffer too many + // queuedMinMessages: this.config.KAFKA_QUEUE_SIZE, + consumerMaxWaitMs: this.config.KAFKA_CONSUMPTION_MAX_WAIT_MS, + consumerErrorBackoffMs: this.config.KAFKA_CONSUMPTION_ERROR_BACKOFF_MS, + fetchBatchSize: this.config.INGESTION_BATCH_SIZE, + batchingTimeoutMs: this.config.KAFKA_CONSUMPTION_BATCHING_TIMEOUT_MS, + topicCreationTimeoutMs: this.config.KAFKA_TOPIC_CREATION_TIMEOUT_MS, + eachBatch: async (messages, { heartbeat }) => { + return await this.scheduleWork(this.handleEachBatch(messages, heartbeat)) + }, + callEachBatchWhenEmpty: false, + }) + + addSentryBreadcrumbsEventListeners(this.batchConsumer.consumer) + + this.batchConsumer.consumer.on('disconnected', async (err) => { + // since we can't be guaranteed that the consumer will be stopped before some other code calls disconnect + // we need to listen to disconnect and make sure we're stopped + status.info('🔁', 'cdp-function-executor batch consumer disconnected, cleaning up', { err }) + await this.stop() + }) + } + + public async stop(): Promise[]> { + status.info('🔁', 'cdp-function-executor - stopping') + this.isStopping = true + + // Mark as stopping so that we don't actually process any more incoming messages, but still keep the process alive + await this.batchConsumer?.stop() + + const promiseResults = await Promise.allSettled(this.promises) + + await this.kafkaProducer?.disconnect() + await this.hogFunctionManager.stop() + + status.info('👍', 'cdp-function-executor - stopped!') + + return promiseResults + } + + public isHealthy() { + // TODO: Maybe extend this to check if we are shutting down so we don't get killed early. + return this.batchConsumer?.isHealthy() + } +} diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts new file mode 100644 index 00000000000000..29f7265e099076 --- /dev/null +++ b/plugin-server/src/cdp/hog-executor.ts @@ -0,0 +1,325 @@ +import { convertHogToJS, convertJSToHog, exec, ExecResult, VMState } from '@posthog/hogvm' +import { Webhook } from '@posthog/plugin-scaffold' +import { DateTime } from 'luxon' + +import { PluginsServerConfig, TimestampFormat } from '../types' +import { trackedFetch } from '../utils/fetch' +import { status } from '../utils/status' +import { castTimestampOrNow, UUIDT } from '../utils/utils' +import { RustyHook } from '../worker/rusty-hook' +import { HogFunctionManager } from './hog-function-manager' +import { + HogFunctionInvocation, + HogFunctionInvocationAsyncResponse, + HogFunctionInvocationGlobals, + HogFunctionInvocationResult, + HogFunctionLogEntry, + HogFunctionLogEntryLevel, + HogFunctionType, +} from './types' +import { convertToHogFunctionFilterGlobal } from './utils' + +export const formatInput = (bytecode: any, globals: HogFunctionInvocation['globals']): any => { + // Similar to how we generate the bytecode by iterating over the values, + // here we iterate over the object and replace the bytecode with the actual values + // bytecode is indicated as an array beginning with ["_h"] + + if (Array.isArray(bytecode) && bytecode[0] === '_h') { + const res = exec(bytecode, { + globals, + timeout: 100, + maxAsyncSteps: 0, + }) + + if (!res.finished) { + // NOT ALLOWED + throw new Error('Input fields must be simple sync values') + } + return convertHogToJS(res.result) + } + + if (Array.isArray(bytecode)) { + return bytecode.map((item) => formatInput(item, globals)) + } else if (typeof bytecode === 'object') { + return Object.fromEntries(Object.entries(bytecode).map(([key, value]) => [key, formatInput(value, globals)])) + } else { + return bytecode + } +} + +export class HogExecutor { + constructor( + private serverConfig: PluginsServerConfig, + private hogFunctionManager: HogFunctionManager, + private rustyHook: RustyHook + ) {} + + /** + * Intended to be invoked as a starting point from an event + */ + async executeMatchingFunctions(event: HogFunctionInvocationGlobals): Promise { + const allFunctionsForTeam = this.hogFunctionManager.getTeamHogFunctions(event.project.id) + + const filtersGlobals = convertToHogFunctionFilterGlobal(event) + + // Filter all functions based on the invocation + const functions = Object.fromEntries( + Object.entries(allFunctionsForTeam).filter(([_key, value]) => { + try { + const filters = value.filters + + if (!filters?.bytecode) { + // NOTE: If we don't have bytecode this indicates something went wrong. + // The model will always save a bytecode if it was compiled correctly + return false + } + + const filterResult = exec(filters.bytecode, { + globals: filtersGlobals, + timeout: 100, + maxAsyncSteps: 0, + }) + + if (typeof filterResult.result !== 'boolean') { + // NOTE: If the result is not a boolean we should not execute the function + return false + } + + return filterResult.result + } catch (error) { + status.error('🦔', `[HogExecutor] Error filtering function`, { + hogFunctionId: value.id, + hogFunctionName: value.name, + error: error.message, + }) + } + + return false + }) + ) + + if (!Object.keys(functions).length) { + return [] + } + + status.info( + '🦔', + `[HogExecutor] Found ${Object.keys(functions).length} matching functions out of ${ + Object.keys(allFunctionsForTeam).length + } for team` + ) + + const results: HogFunctionInvocationResult[] = [] + + for (const hogFunction of Object.values(functions)) { + // Add the source of the trigger to the globals + const modifiedGlobals: HogFunctionInvocationGlobals = { + ...event, + source: { + name: hogFunction.name ?? `Hog function: ${hogFunction.id}`, + url: `${event.project.url}/pipeline/destinations/hog-${hogFunction.id}/configuration/`, + }, + } + + const result = await this.execute(hogFunction, { + id: new UUIDT().toString(), + globals: modifiedGlobals, + }) + + results.push(result) + } + + return results + } + + /** + * Intended to be invoked as a continuation from an async function + */ + async executeAsyncResponse(invocation: HogFunctionInvocationAsyncResponse): Promise { + if (!invocation.hogFunctionId) { + throw new Error('No hog function id provided') + } + + const hogFunction = this.hogFunctionManager.getTeamHogFunctions(invocation.globals.project.id)[ + invocation.hogFunctionId + ] + + invocation.vmState.stack.push(convertJSToHog(invocation.response)) + + await this.execute(hogFunction, invocation, invocation.vmState) + } + + async execute( + hogFunction: HogFunctionType, + invocation: HogFunctionInvocation, + state?: VMState + ): Promise { + const loggingContext = { + hogFunctionId: hogFunction.id, + hogFunctionName: hogFunction.name, + hogFunctionUrl: invocation.globals.source?.url, + } + + status.info('🦔', `[HogExecutor] Executing function`, loggingContext) + + let error: any = null + const logs: HogFunctionLogEntry[] = [] + let lastTimestamp = DateTime.now() + + const log = (level: HogFunctionLogEntryLevel, message: string) => { + // TRICKY: The log entries table is de-duped by timestamp, so we need to ensure that the timestamps are unique + // It is unclear how this affects parallel execution environments + let now = DateTime.now() + if (now <= lastTimestamp) { + // Ensure that the timestamps are unique + now = lastTimestamp.plus(1) + } + lastTimestamp = now + + logs.push({ + team_id: hogFunction.team_id, + log_source: 'hog_function', + log_source_id: hogFunction.id, + instance_id: invocation.id, + timestamp: castTimestampOrNow(now, TimestampFormat.ClickHouse), + level, + message, + }) + } + + if (!state) { + log('debug', `Executing function`) + } else { + log('debug', `Resuming function`) + } + + try { + const globals = this.buildHogFunctionGlobals(hogFunction, invocation) + + const res = exec(state ?? hogFunction.bytecode, { + globals, + timeout: 100, // NOTE: This will likely be configurable in the future + maxAsyncSteps: 5, // NOTE: This will likely be configurable in the future + asyncFunctions: { + // We need to pass these in but they don't actually do anything as it is a sync exec + fetch: async () => Promise.resolve(), + }, + functions: { + print: (...args) => { + const message = args + .map((arg) => (typeof arg !== 'string' ? JSON.stringify(arg) : arg)) + .join(', ') + log('info', message) + }, + }, + }) + + if (!res.finished) { + log('debug', `Suspending function due to async function call '${res.asyncFunctionName}'`) + status.info('🦔', `[HogExecutor] Function returned not finished. Executing async function`, { + ...loggingContext, + asyncFunctionName: res.asyncFunctionName, + }) + switch (res.asyncFunctionName) { + case 'fetch': + await this.asyncFunctionFetch(hogFunction, invocation, res) + break + default: + status.error( + '🦔', + `[HogExecutor] Unknown async function: ${res.asyncFunctionName}`, + loggingContext + ) + // TODO: Log error somewhere + } + } else { + log('debug', `Function completed (${hogFunction.id}) (${hogFunction.name})!`) + } + } catch (err) { + error = err + status.error('🦔', `[HogExecutor] Error executing function ${hogFunction.id} - ${hogFunction.name}`, error) + } + + return { + ...invocation, + success: !error, + error, + logs, + } + } + + buildHogFunctionGlobals(hogFunction: HogFunctionType, invocation: HogFunctionInvocation): Record { + const builtInputs: Record = {} + + Object.entries(hogFunction.inputs).forEach(([key, item]) => { + // TODO: Replace this with iterator + builtInputs[key] = item.value + + if (item.bytecode) { + // Use the bytecode to compile the field + builtInputs[key] = formatInput(item.bytecode, invocation.globals) + } + }) + + return { + ...invocation.globals, + inputs: builtInputs, + } + } + + private async asyncFunctionFetch( + hogFunction: HogFunctionType, + invocation: HogFunctionInvocation, + execResult: ExecResult + ): Promise { + // TODO: validate the args + const args = (execResult.asyncFunctionArgs ?? []).map((arg) => convertHogToJS(arg)) + const url: string = args[0] + const options = args[1] + + const method = options.method || 'POST' + const headers = options.headers || { + 'Content-Type': 'application/json', + } + const body = options.body || {} + + const webhook: Webhook = { + url, + method: method, + headers: headers, + body: typeof body === 'string' ? body : JSON.stringify(body, undefined, 4), + } + + // NOTE: Purposefully disabled for now - once we have callback support we can re-enable + // const SPECIAL_CONFIG_ID = -3 // Hardcoded to mean Hog + // const success = await this.rustyHook.enqueueIfEnabledForTeam({ + // webhook: webhook, + // teamId: hogFunction.team_id, + // pluginId: SPECIAL_CONFIG_ID, + // pluginConfigId: SPECIAL_CONFIG_ID, + // }) + + const success = false + + // TODO: Temporary test code + if (!success) { + status.info('🦔', `[HogExecutor] Webhook not sent via rustyhook, sending directly instead`) + const fetchResponse = await trackedFetch(url, { + method: webhook.method, + body: webhook.body, + headers: webhook.headers, + timeout: this.serverConfig.EXTERNAL_REQUEST_TIMEOUT_MS, + }) + + await this.executeAsyncResponse({ + ...invocation, + hogFunctionId: hogFunction.id, + vmState: execResult.state!, + response: { + status: fetchResponse.status, + body: await fetchResponse.text(), + }, + }) + } + } +} diff --git a/plugin-server/src/cdp/hog-function-manager.ts b/plugin-server/src/cdp/hog-function-manager.ts new file mode 100644 index 00000000000000..52f349b1fcbdb3 --- /dev/null +++ b/plugin-server/src/cdp/hog-function-manager.ts @@ -0,0 +1,132 @@ +import * as schedule from 'node-schedule' + +import { PluginsServerConfig, Team } from '../types' +import { PostgresRouter, PostgresUse } from '../utils/db/postgres' +import { PubSub } from '../utils/pubsub' +import { status } from '../utils/status' +import { HogFunctionType } from './types' + +export type HogFunctionMap = Record +export type HogFunctionCache = Record + +export class HogFunctionManager { + private started: boolean + private ready: boolean + private cache: HogFunctionCache + private pubSub: PubSub + private refreshJob?: schedule.Job + + constructor(private postgres: PostgresRouter, private serverConfig: PluginsServerConfig) { + this.started = false + this.ready = false + this.cache = {} + + this.pubSub = new PubSub(this.serverConfig, { + 'reload-hog-function': async (message) => { + const { hogFunctionId, teamId } = JSON.parse(message) + await this.reloadHogFunction(teamId, hogFunctionId) + }, + }) + } + + public async start(): Promise { + // TRICKY - when running with individual capabilities, this won't run twice but locally or as a complete service it will... + if (this.started) { + return + } + this.started = true + await this.pubSub.start() + await this.reloadAllHogFunctions() + + // every 5 minutes all HogFunctionManager caches are reloaded for eventual consistency + this.refreshJob = schedule.scheduleJob('*/5 * * * *', async () => { + await this.reloadAllHogFunctions().catch((error) => { + status.error('🍿', 'Error reloading hog functions:', error) + }) + }) + this.ready = true + } + + public async stop(): Promise { + if (this.refreshJob) { + schedule.cancelJob(this.refreshJob) + } + + await this.pubSub.stop() + } + + public getTeamHogFunctions(teamId: Team['id']): HogFunctionMap { + if (!this.ready) { + throw new Error('HogFunctionManager is not ready! Run HogFunctionManager.start() before this') + } + return this.cache[teamId] || {} + } + + public teamHasHogFunctions(teamId: Team['id']): boolean { + return !!Object.keys(this.getTeamHogFunctions(teamId)).length + } + + public async reloadAllHogFunctions(): Promise { + this.cache = await fetchAllHogFunctionsGroupedByTeam(this.postgres) + status.info('🍿', 'Fetched all hog functions from DB anew') + } + + public async reloadHogFunction(teamId: Team['id'], id: HogFunctionType['id']): Promise { + status.info('🍿', `Reloading hog function ${id} from DB`) + const item = await fetchHogFunction(this.postgres, id) + if (item) { + this.cache[teamId][id] = item + } else { + delete this.cache[teamId][id] + } + } +} + +const HOG_FUNCTION_FIELDS = ['id', 'team_id', 'name', 'enabled', 'inputs', 'filters', 'bytecode'] + +export async function fetchAllHogFunctionsGroupedByTeam(client: PostgresRouter): Promise { + const items = ( + await client.query( + PostgresUse.COMMON_READ, + ` + SELECT ${HOG_FUNCTION_FIELDS.join(', ')} + FROM posthog_hogfunction + WHERE deleted = FALSE AND enabled = TRUE + `, + [], + 'fetchAllHogFunctions' + ) + ).rows + + const cache: HogFunctionCache = {} + for (const item of items) { + if (!cache[item.team_id]) { + cache[item.team_id] = {} + } + + cache[item.team_id][item.id] = item + } + + return cache +} + +export async function fetchHogFunction( + client: PostgresRouter, + id: HogFunctionType['id'] +): Promise { + const items: HogFunctionType[] = ( + await client.query( + PostgresUse.COMMON_READ, + `SELECT ${HOG_FUNCTION_FIELDS.join(', ')} + FROM posthog_hogfunction + WHERE id = $1 AND deleted = FALSE AND enabled = TRUE`, + [id], + 'fetchHogFunction' + ) + ).rows + if (!items.length) { + return null + } + + return items[0] +} diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts new file mode 100644 index 00000000000000..65fce9d837d612 --- /dev/null +++ b/plugin-server/src/cdp/types.ts @@ -0,0 +1,161 @@ +import { VMState } from '@posthog/hogvm' + +import { ElementPropertyFilter, EventPropertyFilter, PersonPropertyFilter } from '../types' + +export type HogBytecode = any[] + +// subset of EntityFilter +export interface HogFunctionFilterBase { + id: string + name: string | null + order: number + properties: (EventPropertyFilter | PersonPropertyFilter | ElementPropertyFilter)[] +} + +export interface HogFunctionFilterEvent extends HogFunctionFilterBase { + type: 'events' + bytecode: HogBytecode +} + +export interface HogFunctionFilterAction extends HogFunctionFilterBase { + type: 'actions' + // Loaded at run time from Action model + bytecode?: HogBytecode +} + +export type HogFunctionFilter = HogFunctionFilterEvent | HogFunctionFilterAction + +export interface HogFunctionFilters { + events?: HogFunctionFilterEvent[] + actions?: HogFunctionFilterAction[] + filter_test_accounts?: boolean + // Loaded at run time from Team model + filter_test_accounts_bytecode?: boolean + bytecode?: HogBytecode +} + +export type HogFunctionInvocationGlobals = { + project: { + id: number + name: string + url: string + } + source?: { + name: string + url: string + } + event: { + uuid: string + name: string + distinct_id: string + properties: Record + timestamp: string + url: string + } + person?: { + uuid: string + properties: Record + url: string + } + groups?: Record< + string, + { + id: string // the "key" of the group + type: string + index: number + url: string + properties: Record + } + > +} + +export type HogFunctionFilterGlobals = { + // Filter Hog is built in the same way as analytics so the global object is meant to be an event + event: string + timestamp: string + elements_chain: string + properties: Record + + person?: { + properties: Record + } + + group_0?: { + properties: Record + } + group_1?: { + properties: Record + } + group_2?: { + properties: Record + } + group_3?: { + properties: Record + } + group_4?: { + properties: Record + } +} + +export type HogFunctionLogEntrySource = 'system' | 'hog' | 'console' +export type HogFunctionLogEntryLevel = 'debug' | 'info' | 'warn' | 'error' + +export interface HogFunctionLogEntry { + team_id: number + log_source: string // The kind of source (hog_function) + log_source_id: string // The id of the hog function + instance_id: string // The id of the specific invocation + timestamp: string + level: HogFunctionLogEntryLevel + message: string +} + +export type HogFunctionInvocation = { + id: string + globals: HogFunctionInvocationGlobals +} + +export type HogFunctionInvocationResult = HogFunctionInvocation & { + success: boolean + error?: any + logs: HogFunctionLogEntry[] +} + +export type HogFunctionInvocationAsyncRequest = HogFunctionInvocation & { + hogFunctionId: HogFunctionType['id'] + vmState: VMState +} + +export type HogFunctionInvocationAsyncResponse = HogFunctionInvocationAsyncRequest & { + response: any +} + +// Mostly copied from frontend types +export type HogFunctionInputSchemaType = { + type: 'string' | 'number' | 'boolean' | 'dictionary' | 'choice' | 'json' + key: string + label?: string + choices?: { value: string; label: string }[] + required?: boolean + default?: any + secret?: boolean + description?: string +} + +export type HogFunctionType = { + id: string + team_id: number + name: string + enabled: boolean + hog: string + bytecode: HogBytecode + inputs_schema: HogFunctionInputSchemaType[] + inputs: Record< + string, + { + value: any + bytecode?: HogBytecode | object + } + > + filters?: HogFunctionFilters | null +} diff --git a/plugin-server/src/cdp/utils.ts b/plugin-server/src/cdp/utils.ts new file mode 100644 index 00000000000000..82f2739944dc83 --- /dev/null +++ b/plugin-server/src/cdp/utils.ts @@ -0,0 +1,93 @@ +// NOTE: PostIngestionEvent is our context event - it should never be sent directly to an output, but rather transformed into a lightweight schema + +import { GroupTypeToColumnIndex, RawClickHouseEvent, Team } from '../types' +import { clickHouseTimestampToISO } from '../utils/utils' +import { HogFunctionFilterGlobals, HogFunctionInvocationGlobals } from './types' + +// that we can keep to as a contract +export function convertToHogFunctionInvocationGlobals( + event: RawClickHouseEvent, + team: Team, + siteUrl: string, + groupTypes?: GroupTypeToColumnIndex +): HogFunctionInvocationGlobals { + const projectUrl = `${siteUrl}/project/${team.id}` + + const properties = event.properties ? JSON.parse(event.properties) : {} + if (event.elements_chain) { + properties['$elements_chain'] = event.elements_chain + } + + let groups: HogFunctionInvocationGlobals['groups'] = undefined + + if (groupTypes) { + groups = {} + + for (const [groupType, columnIndex] of Object.entries(groupTypes)) { + const groupKey = (properties[`$groups`] || {})[groupType] + const groupProperties = event[`group${columnIndex}_properties`] + + // TODO: Check that groupProperties always exist if the event is in that group + if (groupKey && groupProperties) { + const properties = JSON.parse(groupProperties) + + groups[groupType] = { + id: groupKey, + index: columnIndex, + type: groupType, + url: `${projectUrl}/groups/${columnIndex}/${encodeURIComponent(groupKey)}`, + properties, + } + } + } + } + const context: HogFunctionInvocationGlobals = { + project: { + id: team.id, + name: team.name, + url: projectUrl, + }, + event: { + // TODO: Element chain! + uuid: event.uuid, + name: event.event!, + distinct_id: event.distinct_id, + properties, + timestamp: clickHouseTimestampToISO(event.timestamp), + // TODO: generate url + url: `${projectUrl}/events/${encodeURIComponent(event.uuid)}/${encodeURIComponent( + clickHouseTimestampToISO(event.timestamp) + )}`, + }, + person: event.person_id + ? { + uuid: event.person_id, + properties: event.person_properties ? JSON.parse(event.person_properties) : {}, + // TODO: IS this distinct_id or person_id? + url: `${projectUrl}/person/${encodeURIComponent(event.distinct_id)}`, + } + : undefined, + groups, + } + + return context +} + +export function convertToHogFunctionFilterGlobal(globals: HogFunctionInvocationGlobals): HogFunctionFilterGlobals { + const groups: Record = {} + + for (const [_groupType, group] of Object.entries(globals.groups || {})) { + groups[`group_${group.index}`] = { + properties: group.properties, + } + } + + return { + event: globals.event.name, + elements_chain: globals.event.properties['$elements_chain'], + timestamp: globals.event.timestamp, + properties: globals.event.properties, + person: globals.person ? { properties: globals.person.properties } : undefined, + ...groups, + } +} diff --git a/plugin-server/src/utils/db/db.ts b/plugin-server/src/utils/db/db.ts index a7cd6d0b23dd9f..de57b602725cce 100644 --- a/plugin-server/src/utils/db/db.ts +++ b/plugin-server/src/utils/db/db.ts @@ -717,6 +717,12 @@ export class DB { update: Partial, tx?: TransactionClient ): Promise<[InternalPerson, ProducerRecord[]]> { + let versionString = 'COALESCE(version, 0)::numeric + 1' + if (update.version) { + versionString = update.version.toString() + delete update['version'] + } + const updateValues = Object.values(unparsePersonPartial(update)) // short circuit if there are no updates to be made @@ -727,11 +733,9 @@ export class DB { const values = [...updateValues, person.id].map(sanitizeJsonbValue) // Potentially overriding values badly if there was an update to the person after computing updateValues above - const queryString = `UPDATE posthog_person SET version = COALESCE(version, 0)::numeric + 1, ${Object.keys( - update - ).map((field, index) => `"${sanitizeSqlIdentifier(field)}" = $${index + 1}`)} WHERE id = $${ - Object.values(update).length + 1 - } + const queryString = `UPDATE posthog_person SET version = ${versionString}, ${Object.keys(update).map( + (field, index) => `"${sanitizeSqlIdentifier(field)}" = $${index + 1}` + )} WHERE id = $${Object.values(update).length + 1} RETURNING *` const { rows } = await this.postgres.query( diff --git a/plugin-server/src/utils/event.ts b/plugin-server/src/utils/event.ts index e49f8c0b1519e6..59ccbc6707d2eb 100644 --- a/plugin-server/src/utils/event.ts +++ b/plugin-server/src/utils/event.ts @@ -10,6 +10,7 @@ import { PostIngestionEvent, RawClickHouseEvent, } from '../types' +import { status } from '../utils/status' import { chainToElements } from './db/elements-chain' import { personInitialAndUTMProperties, sanitizeString } from './db/utils' import { @@ -249,13 +250,19 @@ export function formPipelineEvent(message: Message): PipelineEvent { // Track $set usage in events that aren't known to use it, before ingestion adds anything there if ( combinedEvent.properties && - !(combinedEvent.event in PERSON_EVENTS) && - !(combinedEvent.event in KNOWN_SET_EVENTS) && + !PERSON_EVENTS.has(combinedEvent.event) && + !KNOWN_SET_EVENTS.has(combinedEvent.event) && ('$set' in combinedEvent.properties || '$set_once' in combinedEvent.properties || '$unset' in combinedEvent.properties) ) { setUsageInNonPersonEventsCounter.inc() + if (Math.random() < 0.001) { + status.info('👀', 'Found $set usage in non-person event', { + event: combinedEvent.event, + team_id: combinedEvent.team_id, + }) + } } const event: PipelineEvent = normalizeEvent({ diff --git a/plugin-server/src/worker/ingestion/person-state.ts b/plugin-server/src/worker/ingestion/person-state.ts index d3bf32e21310b5..24e279fd981dad 100644 --- a/plugin-server/src/worker/ingestion/person-state.ts +++ b/plugin-server/src/worker/ingestion/person-state.ts @@ -545,6 +545,23 @@ export class PersonState { created_at: createdAt, properties: properties, is_identified: true, + + // By using the max version between the two Persons, we ensure that if + // this Person is later split, we can use `this_person.version + 1` for + // any split-off Persons and know that *that* version will be higher than + // any previously deleted Person, and so the new Person row will "win" and + // "undelete" the Person. + // + // For example: + // - Merge Person_1(version:7) into Person_2(version:2) + // - Person_1 is deleted + // - Person_2 attains version 8 via this code below + // - Person_2 is later split, which attempts to re-create Person_1 by using + // its `distinct_id` to generate the deterministic Person UUID. + // That new Person_1 will have a version _at least_ as high as 8, and + // so any previously existing rows in CH or otherwise from + // Person_1(version:7) will "lose" to this new Person_1. + version: Math.max(mergeInto.version, otherPerson.version) + 1, }, tx ) diff --git a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts new file mode 100644 index 00000000000000..dd93f1521b0c92 --- /dev/null +++ b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts @@ -0,0 +1,220 @@ +import { CdpProcessedEventsConsumer } from '../../src/cdp/cdp-processed-events-consumer' +import { HogFunctionType } from '../../src/cdp/types' +import { defaultConfig } from '../../src/config/config' +import { Hub, PluginsServerConfig, Team } from '../../src/types' +import { createHub } from '../../src/utils/db/hub' +import { getFirstTeam, resetTestDatabase } from '../helpers/sql' +import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' +import { createIncomingEvent, createMessage, insertHogFunction as _insertHogFunction } from './fixtures' + +const config: PluginsServerConfig = { + ...defaultConfig, +} + +const mockConsumer = { + on: jest.fn(), + commitSync: jest.fn(), + commit: jest.fn(), + queryWatermarkOffsets: jest.fn(), + committed: jest.fn(), + assignments: jest.fn(), + isConnected: jest.fn(() => true), + getMetadata: jest.fn(), +} + +jest.mock('../../src/kafka/batch-consumer', () => { + return { + startBatchConsumer: jest.fn(() => + Promise.resolve({ + join: () => ({ + finally: jest.fn(), + }), + stop: jest.fn(), + consumer: mockConsumer, + }) + ), + } +}) + +jest.mock('../../src/utils/fetch', () => { + return { + trackedFetch: jest.fn(() => Promise.resolve({ status: 200, text: () => Promise.resolve({}) })), + } +}) + +jest.mock('../../src/utils/db/kafka-producer-wrapper', () => { + const mockKafkaProducer = { + producer: { + connect: jest.fn(), + }, + disconnect: jest.fn(), + produce: jest.fn(), + } + return { + KafkaProducerWrapper: jest.fn(() => mockKafkaProducer), + } +}) + +const mockFetch: jest.Mock = require('../../src/utils/fetch').trackedFetch + +const mockProducer = require('../../src/utils/db/kafka-producer-wrapper').KafkaProducerWrapper() + +jest.setTimeout(1000) + +const noop = () => {} + +const decodeKafkaMessage = (message: any): any => { + return { + ...message, + value: JSON.parse(message.value.toString()), + } +} + +describe('CDP Processed Events Consuner', () => { + let processor: CdpProcessedEventsConsumer + let hub: Hub + let closeHub: () => Promise + let team: Team + + const insertHogFunction = async (hogFunction: Partial) => { + const item = await _insertHogFunction(hub.postgres, team, hogFunction) + // Trigger the reload that django would do + await processor.hogFunctionManager.reloadAllHogFunctions() + return item + } + + beforeEach(async () => { + await resetTestDatabase() + ;[hub, closeHub] = await createHub() + team = await getFirstTeam(hub) + + processor = new CdpProcessedEventsConsumer(config, hub) + await processor.start() + + mockFetch.mockClear() + }) + + afterEach(async () => { + jest.setTimeout(10000) + await processor.stop() + await closeHub() + }) + + afterAll(() => { + jest.useRealTimers() + }) + + describe('general event processing', () => { + /** + * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios + */ + it('can parse incoming messages correctly', async () => { + await insertHogFunction({ + ...HOG_EXAMPLES.simple_fetch, + ...HOG_INPUTS_EXAMPLES.simple_fetch, + ...HOG_FILTERS_EXAMPLES.no_filters, + }) + // Create a message that should be processed by this function + // Run the function and check that it was executed + await processor.handleEachBatch( + [ + createMessage( + createIncomingEvent(team.id, { + uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', + event: '$pageview', + properties: JSON.stringify({ + $lib_version: '1.0.0', + }), + }) + ), + ], + noop + ) + + expect(mockFetch).toHaveBeenCalledTimes(1) + expect(mockFetch.mock.calls[0]).toMatchInlineSnapshot(` + Array [ + "https://example.com/posthog-webhook", + Object { + "body": "{ + \\"event\\": { + \\"uuid\\": \\"b3a1fe86-b10c-43cc-acaf-d208977608d0\\", + \\"name\\": \\"$pageview\\", + \\"distinct_id\\": \\"distinct_id_1\\", + \\"properties\\": { + \\"$lib_version\\": \\"1.0.0\\", + \\"$elements_chain\\": \\"[]\\" + }, + \\"timestamp\\": null, + \\"url\\": \\"http://localhost:8000/project/2/events/b3a1fe86-b10c-43cc-acaf-d208977608d0/null\\" + }, + \\"groups\\": null, + \\"nested\\": { + \\"foo\\": \\"http://localhost:8000/project/2/events/b3a1fe86-b10c-43cc-acaf-d208977608d0/null\\" + }, + \\"person\\": null, + \\"event_url\\": \\"http://localhost:8000/project/2/events/b3a1fe86-b10c-43cc-acaf-d208977608d0/null-test\\" + }", + "headers": Object { + "version": "v=1.0.0", + }, + "method": "POST", + "timeout": 10000, + }, + ] + `) + }) + + it('generates logs and produces them to kafka', async () => { + await insertHogFunction({ + ...HOG_EXAMPLES.simple_fetch, + ...HOG_INPUTS_EXAMPLES.simple_fetch, + ...HOG_FILTERS_EXAMPLES.no_filters, + }) + + // Create a message that should be processed by this function + // Run the function and check that it was executed + await processor.handleEachBatch( + [ + createMessage( + createIncomingEvent(team.id, { + uuid: 'b3a1fe86-b10c-43cc-acaf-d208977608d0', + event: '$pageview', + properties: JSON.stringify({ + $lib_version: '1.0.0', + }), + }) + ), + ], + noop + ) + + expect(mockFetch).toHaveBeenCalledTimes(1) + expect(mockProducer.produce).toHaveBeenCalledTimes(2) + + expect(decodeKafkaMessage(mockProducer.produce.mock.calls[0][0])).toMatchObject({ + key: expect.any(String), + topic: 'log_entries_test', + value: { + instance_id: expect.any(String), + level: 'debug', + log_source: 'hog_function', + log_source_id: expect.any(String), + message: 'Executing function', + team_id: 2, + timestamp: expect.any(String), + }, + waitForAck: true, + }) + + expect(decodeKafkaMessage(mockProducer.produce.mock.calls[1][0])).toMatchObject({ + topic: 'log_entries_test', + value: { + log_source: 'hog_function', + message: "Suspending function due to async function call 'fetch'", + team_id: 2, + }, + }) + }) + }) +}) diff --git a/plugin-server/tests/cdp/examples.ts b/plugin-server/tests/cdp/examples.ts new file mode 100644 index 00000000000000..9215d84c8026b5 --- /dev/null +++ b/plugin-server/tests/cdp/examples.ts @@ -0,0 +1,161 @@ +import { HogFunctionType } from '../../src/cdp/types' + +/** + * Hog functions are largely generated and built in the django service, making it tricky to test on this side. + * As such we have a bunch of prebuilt examples here for usage in tests. + */ +export const HOG_EXAMPLES: Record> = { + simple_fetch: { + hog: "fetch(inputs.url, {\n 'headers': inputs.headers,\n 'body': inputs.payload,\n 'method': inputs.method,\n 'payload': inputs.payload\n});", + bytecode: [ + '_h', + 32, + 'headers', + 32, + 'headers', + 32, + 'inputs', + 1, + 2, + 32, + 'body', + 32, + 'payload', + 32, + 'inputs', + 1, + 2, + 32, + 'method', + 32, + 'method', + 32, + 'inputs', + 1, + 2, + 32, + 'payload', + 32, + 'payload', + 32, + 'inputs', + 1, + 2, + 42, + 4, + 32, + 'url', + 32, + 'inputs', + 1, + 2, + 2, + 'fetch', + 2, + 35, + ], + }, +} + +export const HOG_INPUTS_EXAMPLES: Record> = { + simple_fetch: { + inputs_schema: [ + { key: 'url', type: 'string', label: 'Webhook URL', secret: false, required: true }, + { key: 'payload', type: 'json', label: 'JSON Payload', secret: false, required: true }, + { + key: 'method', + type: 'choice', + label: 'HTTP Method', + secret: false, + choices: [ + { label: 'POST', value: 'POST' }, + { label: 'PUT', value: 'PUT' }, + { label: 'PATCH', value: 'PATCH' }, + { label: 'GET', value: 'GET' }, + ], + required: true, + }, + { key: 'headers', type: 'dictionary', label: 'Headers', secret: false, required: false }, + ], + inputs: { + url: { + value: 'https://example.com/posthog-webhook', + bytecode: ['_h', 32, 'https://example.com/posthog-webhook'], + }, + method: { value: 'POST' }, + headers: { + value: { version: 'v={event.properties.$lib_version}' }, + bytecode: { + version: ['_h', 32, '$lib_version', 32, 'properties', 32, 'event', 1, 3, 32, 'v=', 2, 'concat', 2], + }, + }, + payload: { + value: { + event: '{event}', + groups: '{groups}', + nested: { foo: '{event.url}' }, + person: '{person}', + event_url: "{f'{event.url}-test'}", + }, + bytecode: { + event: ['_h', 32, 'event', 1, 1], + groups: ['_h', 32, 'groups', 1, 1], + nested: { foo: ['_h', 32, 'url', 32, 'event', 1, 2] }, + person: ['_h', 32, 'person', 1, 1], + event_url: ['_h', 32, '-test', 32, 'url', 32, 'event', 1, 2, 2, 'concat', 2], + }, + }, + }, + }, +} + +export const HOG_FILTERS_EXAMPLES: Record> = { + no_filters: { filters: { events: [], actions: [], bytecode: ['_h', 29] } }, + pageview_or_autocapture_filter: { + filters: { + events: [ + { + id: '$pageview', + name: '$pageview', + type: 'events', + order: 0, + properties: [{ key: '$current_url', type: 'event', value: 'posthog', operator: 'icontains' }], + }, + { id: '$autocapture', name: '$autocapture', type: 'events', order: 1 }, + ], + actions: [], + bytecode: [ + '_h', + 32, + '$autocapture', + 32, + 'event', + 1, + 1, + 11, + 3, + 1, + 32, + '%posthog%', + 32, + '$current_url', + 32, + 'properties', + 1, + 2, + 18, + 32, + '$pageview', + 32, + 'event', + 1, + 1, + 11, + 3, + 2, + 4, + 2, + ], + }, + }, +} diff --git a/plugin-server/tests/cdp/fixtures.ts b/plugin-server/tests/cdp/fixtures.ts new file mode 100644 index 00000000000000..8e6d836756cb52 --- /dev/null +++ b/plugin-server/tests/cdp/fixtures.ts @@ -0,0 +1,93 @@ +import { randomUUID } from 'crypto' +import { Message } from 'node-rdkafka' + +import { HogFunctionInvocationGlobals, HogFunctionType } from '../../src/cdp/types' +import { ClickHouseTimestamp, RawClickHouseEvent, Team } from '../../src/types' +import { PostgresRouter } from '../../src/utils/db/postgres' +import { insertRow } from '../helpers/sql' + +export const createHogFunction = (hogFunction: Partial) => { + const item: HogFunctionType = { + id: randomUUID(), + team_id: 1, + created_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + created_by_id: 1001, + enabled: true, + deleted: false, + description: '', + hog: '', + ...hogFunction, + } + + return item +} + +export const createIncomingEvent = (teamId: number, data: Partial): RawClickHouseEvent => { + return { + team_id: teamId, + created_at: new Date().toISOString() as ClickHouseTimestamp, + elements_chain: '[]', + person_created_at: new Date().toISOString() as ClickHouseTimestamp, + person_properties: '{}', + distinct_id: 'distinct_id_1', + uuid: randomUUID(), + event: '$pageview', + timestamp: new Date().toISOString() as ClickHouseTimestamp, + properties: '{}', + ...data, + } +} + +export const createMessage = (event: RawClickHouseEvent, overrides: Partial = {}): Message => { + return { + partition: 1, + topic: 'test', + offset: 0, + timestamp: overrides.timestamp ?? Date.now(), + size: 1, + ...overrides, + value: Buffer.from(JSON.stringify(event)), + } +} + +export const insertHogFunction = async ( + postgres: PostgresRouter, + team: Team, + hogFunction: Partial = {} +) => { + const res = await insertRow( + postgres, + 'posthog_hogfunction', + createHogFunction({ + team_id: team.id, + ...hogFunction, + }) + ) + return res +} + +export const createHogExecutionGlobals = ( + data: Partial = {} +): HogFunctionInvocationGlobals => { + return { + ...data, + project: { + id: 1, + name: 'test', + url: 'http://localhost:8000/projects/1', + ...(data.project ?? {}), + }, + event: { + uuid: 'uuid', + name: 'test', + distinct_id: 'distinct_id', + url: 'http://localhost:8000/events/1', + properties: { + $lib_version: '1.2.3', + }, + timestamp: new Date().toISOString(), + ...(data.event ?? {}), + }, + } +} diff --git a/plugin-server/tests/cdp/hog-executor.test.ts b/plugin-server/tests/cdp/hog-executor.test.ts new file mode 100644 index 00000000000000..7f989ca01fdbd9 --- /dev/null +++ b/plugin-server/tests/cdp/hog-executor.test.ts @@ -0,0 +1,123 @@ +import { HogExecutor } from '../../src/cdp/hog-executor' +import { HogFunctionManager } from '../../src/cdp/hog-function-manager' +import { defaultConfig } from '../../src/config/config' +import { PluginsServerConfig } from '../../src/types' +import { RustyHook } from '../../src/worker/rusty-hook' +import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' +import { createHogExecutionGlobals, createHogFunction, insertHogFunction as _insertHogFunction } from './fixtures' + +const config: PluginsServerConfig = { + ...defaultConfig, +} + +jest.mock('../../src/utils/fetch', () => { + return { + trackedFetch: jest.fn(() => Promise.resolve({ status: 200, text: () => Promise.resolve({}) })), + } +}) + +const mockFetch = require('../../src/utils/fetch').trackedFetch + +describe('Hog Executor', () => { + jest.setTimeout(1000) + let executor: HogExecutor + + const mockFunctionManager = { + reloadAllHogFunctions: jest.fn(), + getTeamHogFunctions: jest.fn(), + } + + const mockRustyHook = { + enqueueIfEnabledForTeam: jest.fn(() => true), + } + + beforeEach(() => { + jest.useFakeTimers() + jest.setSystemTime(new Date('2024-06-07T12:00:00.000Z').getTime()) + executor = new HogExecutor( + config, + mockFunctionManager as any as HogFunctionManager, + mockRustyHook as any as RustyHook + ) + }) + + describe('general event processing', () => { + /** + * Tests here are somewhat expensive so should mostly simulate happy paths and the more e2e scenarios + */ + it('can parse incoming messages correctly', async () => { + const fn = createHogFunction({ + ...HOG_EXAMPLES.simple_fetch, + ...HOG_INPUTS_EXAMPLES.simple_fetch, + ...HOG_FILTERS_EXAMPLES.no_filters, + }) + + mockFunctionManager.getTeamHogFunctions.mockReturnValue({ + [1]: fn, + }) + + // Create a message that should be processed by this function + // Run the function and check that it was executed + await executor.executeMatchingFunctions(createHogExecutionGlobals()) + + expect(mockFetch).toHaveBeenCalledTimes(1) + expect(mockFetch.mock.calls[0]).toMatchInlineSnapshot(` + Array [ + "https://example.com/posthog-webhook", + Object { + "body": "{ + \\"event\\": { + \\"uuid\\": \\"uuid\\", + \\"name\\": \\"test\\", + \\"distinct_id\\": \\"distinct_id\\", + \\"url\\": \\"http://localhost:8000/events/1\\", + \\"properties\\": { + \\"$lib_version\\": \\"1.2.3\\" + }, + \\"timestamp\\": \\"2024-06-07T12:00:00.000Z\\" + }, + \\"groups\\": null, + \\"nested\\": { + \\"foo\\": \\"http://localhost:8000/events/1\\" + }, + \\"person\\": null, + \\"event_url\\": \\"http://localhost:8000/events/1-test\\" + }", + "headers": Object { + "version": "v=1.2.3", + }, + "method": "POST", + "timeout": 10000, + }, + ] + `) + }) + // NOTE: Will be fixed in follow up + it('can filters incoming messages correctly', async () => { + const fn = createHogFunction({ + ...HOG_EXAMPLES.simple_fetch, + ...HOG_INPUTS_EXAMPLES.simple_fetch, + ...HOG_FILTERS_EXAMPLES.pageview_or_autocapture_filter, + }) + + mockFunctionManager.getTeamHogFunctions.mockReturnValue({ + [1]: fn, + }) + + const resultsShouldntMatch = await executor.executeMatchingFunctions(createHogExecutionGlobals()) + expect(resultsShouldntMatch).toHaveLength(0) + + const resultsShouldMatch = await executor.executeMatchingFunctions( + createHogExecutionGlobals({ + event: { + name: '$pageview', + properties: { + $current_url: 'https://posthog.com', + }, + } as any, + }) + ) + expect(resultsShouldMatch).toHaveLength(1) + }) + }) +}) diff --git a/plugin-server/tests/worker/ingestion/person-state.test.ts b/plugin-server/tests/worker/ingestion/person-state.test.ts index ab921d71902cc1..88b1f1dfabfc82 100644 --- a/plugin-server/tests/worker/ingestion/person-state.test.ts +++ b/plugin-server/tests/worker/ingestion/person-state.test.ts @@ -2207,7 +2207,10 @@ describe('PersonState.update()', () => { // then pros can be dropped, see https://docs.google.com/presentation/d/1Osz7r8bKkDD5yFzw0cCtsGVf1LTEifXS-dzuwaS8JGY // properties: { first: true, second: true, third: true }, created_at: timestamp, - version: 1, // the test intends for it to be a chain, so must get v1, we get v2 if second->first and third->first, but we want it to be third->second->first + // This is 2 because they all start with version 0, and then: x + // third -> second = max(third(0), second(0)) + 1 == version 1 + // second -> first = max(second(1), first(0)) + 1 == version 2 + version: 2, is_identified: true, }) ) @@ -2296,7 +2299,10 @@ describe('PersonState.update()', () => { uuid: firstUserUuid, // guaranteed to be merged into this based on timestamps properties: { first: true, second: true, third: true }, created_at: timestamp, - version: 1, // the test intends for it to be a chain, so must get v1, we get v2 if second->first and third->first, but we want it to be third->second->first + // This is 2 because they all start with version 0, and then: + // third -> second = max(third(0), second(0)) + 1 == version 1 + // second -> first = max(second(1), first(0)) + 1 == version 2 + version: 2, is_identified: true, }) ) diff --git a/posthog/api/hog_function.py b/posthog/api/hog_function.py new file mode 100644 index 00000000000000..77cb4c88e8a7b1 --- /dev/null +++ b/posthog/api/hog_function.py @@ -0,0 +1,182 @@ +import structlog +from django_filters.rest_framework import DjangoFilterBackend +from rest_framework import serializers, viewsets +from rest_framework.serializers import BaseSerializer + +from posthog.api.forbid_destroy_model import ForbidDestroyModel +from posthog.api.log_entries import LogEntryMixin +from posthog.api.routing import TeamAndOrgViewSetMixin +from posthog.api.shared import UserBasicSerializer +from posthog.hogql.bytecode import create_bytecode +from posthog.hogql.parser import parse_program +from posthog.models.hog_functions.hog_function import HogFunction +from posthog.models.hog_functions.utils import generate_template_bytecode +from posthog.permissions import PostHogFeatureFlagPermission + + +logger = structlog.get_logger(__name__) + + +class InputsSchemaItemSerializer(serializers.Serializer): + type = serializers.ChoiceField(choices=["string", "boolean", "dictionary", "choice", "json"]) + key = serializers.CharField() + label = serializers.CharField(required=False) # type: ignore + choices = serializers.ListField(child=serializers.DictField(), required=False) + required = serializers.BooleanField(default=False) # type: ignore + default = serializers.JSONField(required=False) + secret = serializers.BooleanField(default=False) + description = serializers.CharField(required=False) + + # TODO Validate choices if type=choice + + +class AnyInputField(serializers.Field): + def to_internal_value(self, data): + return data + + def to_representation(self, value): + return value + + +class InputsItemSerializer(serializers.Serializer): + value = AnyInputField(required=False) + bytecode = serializers.ListField(required=False, read_only=True) + + def validate(self, attrs): + schema = self.context["schema"] + value = attrs.get("value") + + if schema.get("required") and not value: + raise serializers.ValidationError("This field is required.") + + if not value: + return attrs + + name: str = schema["key"] + item_type = schema["type"] + value = attrs["value"] + + # Validate each type + if item_type == "string": + if not isinstance(value, str): + raise serializers.ValidationError("Value must be a string.") + elif item_type == "boolean": + if not isinstance(value, bool): + raise serializers.ValidationError("Value must be a boolean.") + elif item_type == "dictionary": + if not isinstance(value, dict): + raise serializers.ValidationError("Value must be a dictionary.") + + try: + if value: + if item_type in ["string", "dictionary", "json"]: + attrs["bytecode"] = generate_template_bytecode(value) + except Exception as e: + raise serializers.ValidationError({"inputs": {name: f"Invalid template: {str(e)}"}}) + + return attrs + + +class HogFunctionMinimalSerializer(serializers.ModelSerializer): + created_by = UserBasicSerializer(read_only=True) + + class Meta: + model = HogFunction + fields = [ + "id", + "name", + "description", + "created_at", + "created_by", + "updated_at", + "enabled", + "hog", + "filters", + ] + read_only_fields = fields + + +class HogFunctionSerializer(HogFunctionMinimalSerializer): + class Meta: + model = HogFunction + fields = [ + "id", + "name", + "description", + "created_at", + "created_by", + "updated_at", + "enabled", + "hog", + "bytecode", + "inputs_schema", + "inputs", + "filters", + ] + read_only_fields = [ + "id", + "created_at", + "created_by", + "updated_at", + "bytecode", + ] + + def validate_inputs_schema(self, value): + if not isinstance(value, list): + raise serializers.ValidationError("inputs_schema must be a list of objects.") + + serializer = InputsSchemaItemSerializer(data=value, many=True) + + if not serializer.is_valid(): + raise serializers.ValidationError(serializer.errors) + + return serializer.validated_data or [] + + def validate(self, attrs): + team = self.context["get_team"]() + attrs["team"] = team + attrs["inputs_schema"] = attrs.get("inputs_schema", []) + attrs["inputs"] = attrs.get("inputs", {}) + attrs["filters"] = attrs.get("filters", {}) + + validated_inputs = {} + + for schema in attrs["inputs_schema"]: + value = attrs["inputs"].get(schema["key"], {}) + serializer = InputsItemSerializer(data=value, context={"schema": schema}) + + if not serializer.is_valid(): + first_error = next(iter(serializer.errors.values()))[0] + raise serializers.ValidationError({"inputs": {schema["key"]: first_error}}) + + validated_inputs[schema["key"]] = serializer.validated_data + + attrs["inputs"] = validated_inputs + + # Attempt to compile the hog + try: + program = parse_program(attrs["hog"]) + attrs["bytecode"] = create_bytecode(program, supported_functions={"fetch"}) + except Exception as e: + raise serializers.ValidationError({"hog": str(e)}) + + return attrs + + def create(self, validated_data: dict, *args, **kwargs) -> HogFunction: + request = self.context["request"] + validated_data["created_by"] = request.user + return super().create(validated_data=validated_data) + + +class HogFunctionViewSet(TeamAndOrgViewSetMixin, LogEntryMixin, ForbidDestroyModel, viewsets.ModelViewSet): + scope_object = "INTERNAL" # Keep internal until we are happy to release this GA + queryset = HogFunction.objects.all() + filter_backends = [DjangoFilterBackend] + filterset_fields = ["id", "team", "created_by", "enabled"] + + permission_classes = [PostHogFeatureFlagPermission] + posthog_feature_flag = {"hog-functions": ["create", "partial_update", "update"]} + log_source = "hog_function" + + def get_serializer_class(self) -> type[BaseSerializer]: + return HogFunctionMinimalSerializer if self.action == "list" else HogFunctionSerializer diff --git a/posthog/api/log_entries.py b/posthog/api/log_entries.py new file mode 100644 index 00000000000000..fda13747bf266f --- /dev/null +++ b/posthog/api/log_entries.py @@ -0,0 +1,121 @@ +import dataclasses +from datetime import datetime +from typing import Any, Optional, cast +from rest_framework import serializers, viewsets +from rest_framework.request import Request +from rest_framework.response import Response +from rest_framework.decorators import action +from rest_framework.exceptions import ValidationError +from rest_framework_dataclasses.serializers import DataclassSerializer + +from posthog.clickhouse.client.execute import sync_execute + + +@dataclasses.dataclass(frozen=True) +class LogEntry: + log_source_id: str + instance_id: str + timestamp: datetime + level: str + message: str + + +class LogEntrySerializer(DataclassSerializer): + class Meta: + dataclass = LogEntry + + +class LogEntryRequestSerializer(serializers.Serializer): + limit = serializers.IntegerField(required=False, default=50, max_value=500, min_value=1) + after = serializers.DateTimeField(required=False) + before = serializers.DateTimeField(required=False) + level = serializers.ListField(child=serializers.CharField(), required=False) + search = serializers.CharField(required=False) + instance_id = serializers.CharField(required=False) + + +def fetch_log_entries( + team_id: int, + log_source: str, + log_source_id: str, + limit: int, + instance_id: Optional[str] = None, + after: Optional[datetime] = None, + before: Optional[datetime] = None, + search: Optional[str] = None, + level: Optional[list[str]] = None, +) -> list[Any]: + """Fetch a list of batch export log entries from ClickHouse.""" + if level is None: + level = [] + clickhouse_where_parts: list[str] = [] + clickhouse_kwargs: dict[str, Any] = {} + + clickhouse_where_parts.append("log_source = %(log_source)s") + clickhouse_kwargs["log_source"] = log_source + clickhouse_where_parts.append("log_source_id = %(log_source_id)s") + clickhouse_kwargs["log_source_id"] = log_source_id + clickhouse_where_parts.append("team_id = %(team_id)s") + clickhouse_kwargs["team_id"] = team_id + + if instance_id: + clickhouse_where_parts.append("instance_id = %(instance_id)s") + clickhouse_kwargs["instance_id"] = instance_id + if after: + clickhouse_where_parts.append("timestamp > toDateTime64(%(after)s, 6)") + clickhouse_kwargs["after"] = after.isoformat().replace("+00:00", "") + if before: + clickhouse_where_parts.append("timestamp < toDateTime64(%(before)s, 6)") + clickhouse_kwargs["before"] = before.isoformat().replace("+00:00", "") + if search: + clickhouse_where_parts.append("message ILIKE %(search)s") + clickhouse_kwargs["search"] = f"%{search}%" + if len(level) > 0: + clickhouse_where_parts.append("upper(level) in %(levels)s") + clickhouse_kwargs["levels"] = level + + clickhouse_query = f""" + SELECT log_source_id, instance_id, timestamp, upper(level) as level, message FROM log_entries + WHERE {' AND '.join(clickhouse_where_parts)} ORDER BY timestamp DESC {f'LIMIT {limit}'} + """ + + return [LogEntry(*result) for result in cast(list, sync_execute(clickhouse_query, clickhouse_kwargs))] + + +class LogEntryMixin(viewsets.GenericViewSet): + log_source: str # Should be set by the inheriting class + + @action(detail=True, methods=["GET"]) + def logs(self, request: Request, *args, **kwargs): + obj = self.get_object() + + param_serializer = LogEntryRequestSerializer(data=request.query_params) + + if not self.log_source: + raise ValidationError("log_source not set on the viewset") + + if not param_serializer.is_valid(): + raise ValidationError(param_serializer.errors) + + params = param_serializer.validated_data + + data = fetch_log_entries( + team_id=self.team_id, # type: ignore + log_source=self.log_source, + log_source_id=str(obj.id), + limit=params["limit"], + # From request params + instance_id=params.get("instance_id"), + after=params.get("after"), + before=params.get("before"), + search=params.get("search"), + level=params.get("level"), + ) + + page = self.paginate_queryset(data) + if page is not None: + serializer = LogEntrySerializer(page, many=True) + return self.get_paginated_response(serializer.data) + + serializer = LogEntrySerializer(data, many=True) + return Response({"status": "not implemented"}) diff --git a/posthog/api/test/test_hog_function.py b/posthog/api/test/test_hog_function.py new file mode 100644 index 00000000000000..63b6fbc22ec93d --- /dev/null +++ b/posthog/api/test/test_hog_function.py @@ -0,0 +1,287 @@ +import json +from unittest.mock import ANY, patch + +from rest_framework import status + +from posthog.models.action.action import Action +from posthog.test.base import APIBaseTest, ClickhouseTestMixin, QueryMatchingTest + + +EXAMPLE_FULL = { + "name": "HogHook", + "hog": "fetch(inputs.url, {\n 'headers': inputs.headers,\n 'body': inputs.payload,\n 'method': inputs.method\n});", + "inputs_schema": [ + {"key": "url", "type": "string", "label": "Webhook URL", "required": True}, + {"key": "payload", "type": "json", "label": "JSON Payload", "required": True}, + { + "key": "method", + "type": "choice", + "label": "HTTP Method", + "choices": [ + {"label": "POST", "value": "POST"}, + {"label": "PUT", "value": "PUT"}, + {"label": "PATCH", "value": "PATCH"}, + {"label": "GET", "value": "GET"}, + ], + "required": True, + }, + {"key": "headers", "type": "dictionary", "label": "Headers", "required": False}, + ], + "inputs": { + "url": { + "value": "http://localhost:2080/0e02d917-563f-4050-9725-aad881b69937", + }, + "method": {"value": "POST"}, + "headers": { + "value": {"version": "v={event.properties.$lib_version}"}, + }, + "payload": { + "value": { + "event": "{event}", + "groups": "{groups}", + "nested": {"foo": "{event.url}"}, + "person": "{person}", + "event_url": "{f'{event.url}-test'}", + }, + }, + }, + "filters": { + "events": [{"id": "$pageview", "name": "$pageview", "type": "events", "order": 0}], + "actions": [{"id": "9", "name": "Test Action", "type": "actions", "order": 1}], + "filter_test_accounts": True, + }, +} + + +class TestHogFunctionAPI(ClickhouseTestMixin, APIBaseTest, QueryMatchingTest): + @patch("posthog.permissions.posthoganalytics.feature_enabled") + def test_create_hog_function_forbidden_if_not_in_flag(self, mock_feature_enabled): + mock_feature_enabled.return_value = False + + response = self.client.post( + f"/api/projects/{self.team.id}/hog_functions/", + data={ + "name": "Fetch URL", + "description": "Test description", + "hog": "fetch(inputs.url);", + }, + ) + assert response.status_code == status.HTTP_403_FORBIDDEN, response.json() + + assert mock_feature_enabled.call_count == 1 + assert mock_feature_enabled.call_args[0][0] == ("hog-functions") + + @patch("posthog.permissions.posthoganalytics.feature_enabled", return_value=True) + def test_create_hog_function(self, *args): + response = self.client.post( + f"/api/projects/{self.team.id}/hog_functions/", + data={ + "name": "Fetch URL", + "description": "Test description", + "hog": "fetch(inputs.url);", + }, + ) + assert response.status_code == status.HTTP_201_CREATED, response.json() + assert response.json()["created_by"]["id"] == self.user.id + assert response.json() == { + "id": ANY, + "name": "Fetch URL", + "description": "Test description", + "created_at": ANY, + "created_by": ANY, + "updated_at": ANY, + "enabled": False, + "hog": "fetch(inputs.url);", + "bytecode": ["_h", 32, "url", 32, "inputs", 1, 2, 2, "fetch", 1, 35], + "inputs_schema": [], + "inputs": {}, + "filters": {"bytecode": ["_h", 29]}, + } + + @patch("posthog.permissions.posthoganalytics.feature_enabled", return_value=True) + def test_inputs_required(self, *args): + payload = { + "name": "Fetch URL", + "hog": "fetch(inputs.url);", + "inputs_schema": [ + {"key": "url", "type": "string", "label": "Webhook URL", "required": True}, + ], + } + # Check required + res = self.client.post(f"/api/projects/{self.team.id}/hog_functions/", data={**payload}) + assert res.status_code == status.HTTP_400_BAD_REQUEST, res.json() + assert res.json() == { + "type": "validation_error", + "code": "invalid_input", + "detail": "This field is required.", + "attr": "inputs__url", + } + + @patch("posthog.permissions.posthoganalytics.feature_enabled", return_value=True) + def test_inputs_mismatch_type(self, *args): + payload = { + "name": "Fetch URL", + "hog": "fetch(inputs.url);", + "inputs_schema": [ + {"key": "string", "type": "string"}, + {"key": "dictionary", "type": "dictionary"}, + {"key": "boolean", "type": "boolean"}, + ], + } + + bad_inputs = { + "string": 123, + "dictionary": 123, + "boolean": 123, + } + + for key, value in bad_inputs.items(): + res = self.client.post( + f"/api/projects/{self.team.id}/hog_functions/", data={**payload, "inputs": {key: {"value": value}}} + ) + assert res.json() == { + "type": "validation_error", + "code": "invalid_input", + "detail": f"Value must be a {key}.", + "attr": f"inputs__{key}", + }, f"Did not get error for {key}, got {res.json()}" + assert res.status_code == status.HTTP_400_BAD_REQUEST, res.json() + + @patch("posthog.permissions.posthoganalytics.feature_enabled", return_value=True) + def test_generates_hog_bytecode(self, *args): + response = self.client.post( + f"/api/projects/{self.team.id}/hog_functions/", + data={ + "name": "Fetch URL", + "hog": "let i := 0;\nwhile(i < 3) {\n i := i + 1;\n fetch(inputs.url, {\n 'headers': {\n 'x-count': f'{i}'\n },\n 'body': inputs.payload,\n 'method': inputs.method\n });\n}", + }, + ) + # JSON loads for one line comparison + assert response.json()["bytecode"] == json.loads( + '["_h", 33, 0, 33, 3, 36, 0, 15, 40, 45, 33, 1, 36, 0, 6, 37, 0, 32, "headers", 32, "x-count", 36, 0, 42, 1, 32, "body", 32, "payload", 32, "inputs", 1, 2, 32, "method", 32, "method", 32, "inputs", 1, 2, 42, 3, 32, "url", 32, "inputs", 1, 2, 2, "fetch", 2, 35, 39, -52, 35]' + ), response.json() + + @patch("posthog.permissions.posthoganalytics.feature_enabled", return_value=True) + def test_generates_inputs_bytecode(self, *args): + response = self.client.post(f"/api/projects/{self.team.id}/hog_functions/", data=EXAMPLE_FULL) + assert response.status_code == status.HTTP_201_CREATED, response.json() + assert response.json()["inputs"] == { + "url": { + "value": "http://localhost:2080/0e02d917-563f-4050-9725-aad881b69937", + "bytecode": ["_h", 32, "http://localhost:2080/0e02d917-563f-4050-9725-aad881b69937"], + }, + "payload": { + "value": { + "event": "{event}", + "groups": "{groups}", + "nested": {"foo": "{event.url}"}, + "person": "{person}", + "event_url": "{f'{event.url}-test'}", + }, + "bytecode": { + "event": ["_h", 32, "event", 1, 1], + "groups": ["_h", 32, "groups", 1, 1], + "nested": {"foo": ["_h", 32, "url", 32, "event", 1, 2]}, + "person": ["_h", 32, "person", 1, 1], + "event_url": ["_h", 32, "-test", 32, "url", 32, "event", 1, 2, 2, "concat", 2], + }, + }, + "method": {"value": "POST"}, + "headers": { + "value": {"version": "v={event.properties.$lib_version}"}, + "bytecode": { + "version": ["_h", 32, "$lib_version", 32, "properties", 32, "event", 1, 3, 32, "v=", 2, "concat", 2] + }, + }, + } + + @patch("posthog.permissions.posthoganalytics.feature_enabled", return_value=True) + def test_generates_filters_bytecode(self, *args): + action = Action.objects.create( + team=self.team, + name="test action", + steps_json=[{"event": "$pageview", "url": "docs", "url_matching": "contains"}], + ) + + self.team.test_account_filters = [ + { + "key": "email", + "value": "@posthog.com", + "operator": "not_icontains", + "type": "person", + } + ] + self.team.save() + response = self.client.post( + f"/api/projects/{self.team.id}/hog_functions/", + data={ + **EXAMPLE_FULL, + "filters": { + "events": [{"id": "$pageview", "name": "$pageview", "type": "events", "order": 0}], + "actions": [{"id": f"{action.id}", "name": "Test Action", "type": "actions", "order": 1}], + "filter_test_accounts": True, + }, + }, + ) + assert response.status_code == status.HTTP_201_CREATED, response.json() + assert response.json()["filters"] == { + "events": [{"id": "$pageview", "name": "$pageview", "type": "events", "order": 0}], + "actions": [{"id": f"{action.id}", "name": "Test Action", "type": "actions", "order": 1}], + "filter_test_accounts": True, + "bytecode": [ + "_h", + 32, + "%docs%", + 32, + "$current_url", + 32, + "properties", + 1, + 2, + 17, + 32, + "$pageview", + 32, + "event", + 1, + 1, + 11, + 3, + 2, + 32, + "%@posthog.com%", + 32, + "email", + 32, + "properties", + 32, + "person", + 1, + 3, + 20, + 3, + 2, + 32, + "$pageview", + 32, + "event", + 1, + 1, + 11, + 32, + "%@posthog.com%", + 32, + "email", + 32, + "properties", + 32, + "person", + 1, + 3, + 20, + 3, + 2, + 4, + 2, + ], + } diff --git a/posthog/clickhouse/client/limit.py b/posthog/clickhouse/client/limit.py new file mode 100644 index 00000000000000..7af284451816d8 --- /dev/null +++ b/posthog/clickhouse/client/limit.py @@ -0,0 +1,84 @@ +import time +from functools import wraps +from typing import Optional +from collections.abc import Callable + +from celery import current_task +from prometheus_client import Counter + +from posthog import redis + +CONCURRENT_TASKS_LIMIT_EXCEEDED_COUNTER = Counter( + "posthog_celery_task_concurrency_limit_exceeded", + "Number of times a Celery task exceeded the concurrency limit", + ["task_name", "limit", "key"], +) + +# Lua script for atomic check, remove expired if limit hit, and increment with TTL +lua_script = """ +local key = KEYS[1] +local current_time = tonumber(ARGV[1]) +local task_id = ARGV[2] +local max_concurrent_tasks = tonumber(ARGV[3]) +local ttl = tonumber(ARGV[4]) +local expiration_time = current_time + ttl + +-- Check the number of current running tasks +local running_tasks_count = redis.call('ZCARD', key) +if running_tasks_count >= max_concurrent_tasks then + -- Remove expired tasks if limit is hit + redis.call('ZREMRANGEBYSCORE', key, '-inf', current_time) + running_tasks_count = redis.call('ZCARD', key) + if running_tasks_count >= max_concurrent_tasks then + return 0 + end +end + +-- Add the new task with its expiration time +redis.call('ZADD', key, expiration_time, task_id) +return 1 +""" + + +class CeleryConcurrencyLimitExceeded(Exception): + pass + + +def limit_concurrency(max_concurrent_tasks: int, key: Optional[Callable] = None, ttl: int = 60 * 15) -> Callable: + def decorator(task_func): + @wraps(task_func) + def wrapper(*args, **kwargs): + task_name = current_task.name + redis_client = redis.get_client() + running_tasks_key = f"celery_running_tasks:{task_name}" + if key: + dynamic_key = key(*args, **kwargs) + running_tasks_key = f"{running_tasks_key}:{dynamic_key}" + else: + dynamic_key = None + task_id = f"{task_name}:{current_task.request.id}" + current_time = int(time.time()) + + # Atomically check, remove expired if limit hit, and add the new task + if ( + redis_client.eval(lua_script, 1, running_tasks_key, current_time, task_id, max_concurrent_tasks, ttl) + == 0 + ): + CONCURRENT_TASKS_LIMIT_EXCEEDED_COUNTER.labels( + task_name=task_name, limit=max_concurrent_tasks, key=dynamic_key + ).inc() + + raise CeleryConcurrencyLimitExceeded( + f"Exceeded maximum concurrent tasks limit: {max_concurrent_tasks} for key: {dynamic_key}" + ) + + try: + # Execute the task + return task_func(*args, **kwargs) + finally: + # Remove the task ID from the sorted set when the task finishes + redis_client.zrem(running_tasks_key, task_id) + + return wrapper + + return decorator diff --git a/posthog/hogql/database/schema/util/where_clause_extractor.py b/posthog/hogql/database/schema/util/where_clause_extractor.py index 1e314f02d4993c..7cb413960ca809 100644 --- a/posthog/hogql/database/schema/util/where_clause_extractor.py +++ b/posthog/hogql/database/schema/util/where_clause_extractor.py @@ -416,6 +416,9 @@ def visit_placeholder(self, node: ast.Placeholder) -> bool: def visit_alias(self, node: ast.Alias) -> bool: return self.visit(node.expr) + def visit_tuple(self, node: ast.Tuple) -> bool: + return all(self.visit(arg) for arg in node.exprs) + def is_simple_timestamp_field_expression(expr: ast.Expr, context: HogQLContext, tombstone_string: str) -> bool: return IsSimpleTimestampFieldExpressionVisitor(context, tombstone_string).visit(expr) @@ -515,6 +518,9 @@ def visit_alias(self, node: ast.Alias) -> bool: return self.visit(node.expr) + def visit_tuple(self, node: ast.Tuple) -> bool: + return all(self.visit(arg) for arg in node.exprs) + def rewrite_timestamp_field(expr: ast.Expr, context: HogQLContext) -> ast.Expr: return RewriteTimestampFieldVisitor(context).visit(expr) diff --git a/posthog/migrations/0425_hogfunction.py b/posthog/migrations/0425_hogfunction.py new file mode 100644 index 00000000000000..a04b78d6d4ab91 --- /dev/null +++ b/posthog/migrations/0425_hogfunction.py @@ -0,0 +1,47 @@ +# Generated by Django 4.2.11 on 2024-06-10 08:02 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import posthog.models.utils + + +class Migration(migrations.Migration): + dependencies = [ + ("posthog", "0424_survey_current_iteration_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="HogFunction", + fields=[ + ( + "id", + models.UUIDField( + default=posthog.models.utils.UUIDT, editable=False, primary_key=True, serialize=False + ), + ), + ("name", models.CharField(blank=True, max_length=400, null=True)), + ("description", models.TextField(blank=True, default="")), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("deleted", models.BooleanField(default=False)), + ("updated_at", models.DateTimeField(auto_now=True)), + ("enabled", models.BooleanField(default=False)), + ("hog", models.TextField()), + ("bytecode", models.JSONField(blank=True, null=True)), + ("inputs_schema", models.JSONField(null=True)), + ("inputs", models.JSONField(null=True)), + ("filters", models.JSONField(blank=True, null=True)), + ( + "created_by", + models.ForeignKey( + blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL + ), + ), + ("team", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="posthog.team")), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/posthog/migrations/0426_externaldatasource_sync_frequency.py b/posthog/migrations/0426_externaldatasource_sync_frequency.py new file mode 100644 index 00000000000000..6bb13966e591b9 --- /dev/null +++ b/posthog/migrations/0426_externaldatasource_sync_frequency.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2.11 on 2024-06-06 15:43 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("posthog", "0425_hogfunction"), + ] + + operations = [ + migrations.AddField( + model_name="externaldatasource", + name="sync_frequency", + field=models.CharField( + blank=True, + choices=[("day", "Daily"), ("week", "Weekly"), ("month", "Monthly")], + default="day", + max_length=128, + ), + ), + ] diff --git a/posthog/models/hog_functions/__init__.py b/posthog/models/hog_functions/__init__.py new file mode 100644 index 00000000000000..c2af1396e40791 --- /dev/null +++ b/posthog/models/hog_functions/__init__.py @@ -0,0 +1 @@ +from .hog_function import * diff --git a/posthog/models/hog_functions/hog_function.py b/posthog/models/hog_functions/hog_function.py new file mode 100644 index 00000000000000..8355832e0e2afe --- /dev/null +++ b/posthog/models/hog_functions/hog_function.py @@ -0,0 +1,118 @@ +import json +from typing import Optional + +from django.db import models +from django.db.models.signals import post_save +from django.dispatch.dispatcher import receiver + +from posthog.models.action.action import Action +from posthog.models.team.team import Team +from posthog.models.utils import UUIDModel +from posthog.redis import get_client + + +class HogFunction(UUIDModel): + team: models.ForeignKey = models.ForeignKey("Team", on_delete=models.CASCADE) + name: models.CharField = models.CharField(max_length=400, null=True, blank=True) + description: models.TextField = models.TextField(blank=True, default="") + created_at: models.DateTimeField = models.DateTimeField(auto_now_add=True, blank=True) + created_by: models.ForeignKey = models.ForeignKey("User", on_delete=models.SET_NULL, null=True, blank=True) + deleted: models.BooleanField = models.BooleanField(default=False) + updated_at: models.DateTimeField = models.DateTimeField(auto_now=True) + enabled: models.BooleanField = models.BooleanField(default=False) + + hog: models.TextField = models.TextField() + bytecode: models.JSONField = models.JSONField(null=True, blank=True) + inputs_schema: models.JSONField = models.JSONField(null=True) + inputs: models.JSONField = models.JSONField(null=True) + filters: models.JSONField = models.JSONField(null=True, blank=True) + + @property + def filter_action_ids(self) -> list[int]: + if not self.filters: + return [] + try: + return [int(action["id"]) for action in self.filters.get("actions", [])] + except KeyError: + return [] + + def compile_filters_bytecode(self, actions: Optional[dict[int, Action]] = None): + from .utils import hog_function_filters_to_expr + from posthog.hogql.bytecode import create_bytecode + + self.filters = self.filters or {} + + if actions is None: + # If not provided as an optimization we fetch all actions + actions_list = ( + Action.objects.select_related("team").filter(team_id=self.team_id).filter(id__in=self.filter_action_ids) + ) + actions = {action.id: action for action in actions_list} + + try: + self.filters["bytecode"] = create_bytecode(hog_function_filters_to_expr(self.filters, self.team, actions)) + except Exception as e: + # TODO: Better reporting of this issue + self.filters["bytecode"] = None + self.filters["bytecode_error"] = str(e) + + def save(self, *args, **kwargs): + self.compile_filters_bytecode() + return super().save(*args, **kwargs) + + def __str__(self): + return self.name + + +@receiver(post_save, sender=HogFunction) +def hog_function_saved(sender, instance: HogFunction, created, **kwargs): + get_client().publish( + "reload-hog-function", + json.dumps({"teamId": instance.team_id, "hogFunctionId": str(instance.id)}), + ) + + +@receiver(post_save, sender=Action) +def action_saved(sender, instance: Action, created, **kwargs): + # Whenever an action is saved we want to load all hog functions using it + # and trigger a refresh of the filters bytecode + + affected_hog_functions = ( + HogFunction.objects.select_related("team") + .filter(team_id=instance.team_id) + .filter(filters__contains={"actions": [{"id": str(instance.id)}]}) + ) + + refresh_hog_functions(team_id=instance.team_id, affected_hog_functions=list(affected_hog_functions)) + + +@receiver(post_save, sender=Team) +def team_saved(sender, instance: Team, created, **kwargs): + affected_hog_functions = ( + HogFunction.objects.select_related("team") + .filter(team_id=instance.id) + .filter(filters__contains={"filter_test_accounts": True}) + ) + + refresh_hog_functions(team_id=instance.id, affected_hog_functions=list(affected_hog_functions)) + + +def refresh_hog_functions(team_id: int, affected_hog_functions: list[HogFunction]) -> int: + all_related_actions = ( + Action.objects.select_related("team") + .filter(team_id=team_id) + .filter( + id__in=[ + action_id for hog_function in affected_hog_functions for action_id in hog_function.filter_action_ids + ] + ) + ) + + actions_by_id = {action.id: action for action in all_related_actions} + + for hog_function in affected_hog_functions: + hog_function.compile_filters_bytecode(actions=actions_by_id) + + updates = HogFunction.objects.bulk_update(affected_hog_functions, ["filters"]) + + return updates diff --git a/posthog/models/hog_functions/utils.py b/posthog/models/hog_functions/utils.py new file mode 100644 index 00000000000000..5ec265487d2e32 --- /dev/null +++ b/posthog/models/hog_functions/utils.py @@ -0,0 +1,66 @@ +from typing import Any +from posthog.models.action.action import Action +from posthog.hogql.bytecode import create_bytecode +from posthog.hogql.parser import parse_expr, parse_string_template +from posthog.hogql.property import action_to_expr, property_to_expr, ast +from posthog.models.team.team import Team + + +def hog_function_filters_to_expr(filters: dict, team: Team, actions: dict[int, Action]) -> ast.Expr: + test_account_filters_exprs: list[ast.Expr] = [] + if filters.get("filter_test_accounts", False): + test_account_filters_exprs = [property_to_expr(property, team) for property in team.test_account_filters] + + all_filters = filters.get("events", []) + filters.get("actions", []) + all_filters_exprs: list[ast.Expr] = [] + + if not all_filters and test_account_filters_exprs: + # Always return test filters if set and no other filters + return ast.And(exprs=test_account_filters_exprs) + + for filter in all_filters: + exprs: list[ast.Expr] = [] + exprs.extend(test_account_filters_exprs) + + # Events + if filter.get("type") == "events" and filter.get("name"): + exprs.append(parse_expr("event = {event}", {"event": ast.Constant(value=filter["name"])})) + + # Actions + if filter.get("type") == "actions": + try: + action = actions[int(filter["id"])] + exprs.append(action_to_expr(action)) + except KeyError: + # If an action doesn't exist, we want to return no events + exprs.append(parse_expr("1 = 2")) + + # Properties + if filter.get("properties"): + exprs.append(property_to_expr(filter.get("properties"), team)) + + if len(exprs) == 0: + all_filters_exprs.append(ast.Constant(value=True)) + + all_filters_exprs.append(ast.And(exprs=exprs)) + + if all_filters_exprs: + final_expr = ast.Or(exprs=all_filters_exprs) + return final_expr + else: + return ast.Constant(value=True) + + +def generate_template_bytecode(obj: Any) -> Any: + """ + Clones an object, compiling any string values to bytecode templates + """ + + if isinstance(obj, dict): + return {key: generate_template_bytecode(value) for key, value in obj.items()} + elif isinstance(obj, list): + return [generate_template_bytecode(item) for item in obj] + elif isinstance(obj, str): + return create_bytecode(parse_string_template(obj)) + else: + return obj diff --git a/posthog/models/person/person.py b/posthog/models/person/person.py index 20f9dd7675487f..72a5bd7c799487 100644 --- a/posthog/models/person/person.py +++ b/posthog/models/person/person.py @@ -6,6 +6,7 @@ from posthog.models.utils import UUIDT from ..team import Team +from .missing_person import uuidFromDistinctId MAX_LIMIT_DISTINCT_IDS = 2500 @@ -51,7 +52,9 @@ def _add_distinct_ids(self, distinct_ids: list[str]) -> None: self.add_distinct_id(distinct_id) def split_person(self, main_distinct_id: Optional[str], max_splits: Optional[int] = None): - distinct_ids = Person.objects.get(pk=self.pk).distinct_ids + original_person = Person.objects.get(pk=self.pk) + distinct_ids = original_person.distinct_ids + original_person_version = original_person.version or 0 if not main_distinct_id: self.properties = {} self.save() @@ -65,7 +68,13 @@ def split_person(self, main_distinct_id: Optional[str], max_splits: Optional[int if not distinct_id == main_distinct_id: with transaction.atomic(): pdi = PersonDistinctId.objects.select_for_update().get(person=self, distinct_id=distinct_id) - person = Person.objects.create(team_id=self.team_id) + person, _ = Person.objects.get_or_create( + uuid=uuidFromDistinctId(self.team_id, distinct_id), + team_id=self.team_id, + defaults={ + "version": original_person_version + 1, + }, + ) pdi.person_id = str(person.id) pdi.version = (pdi.version or 0) + 1 pdi.save(update_fields=["version", "person_id"]) @@ -83,9 +92,7 @@ def split_person(self, main_distinct_id: Optional[str], max_splits: Optional[int version=pdi.version, ) create_person( - team_id=self.team_id, - uuid=str(person.uuid), - version=person.version or 0, + team_id=self.team_id, uuid=str(person.uuid), version=person.version, created_at=person.created_at ) objects = PersonManager() diff --git a/posthog/models/test/test_hog_function.py b/posthog/models/test/test_hog_function.py new file mode 100644 index 00000000000000..35779b7efbad2b --- /dev/null +++ b/posthog/models/test/test_hog_function.py @@ -0,0 +1,283 @@ +import json +from django.test import TestCase +from inline_snapshot import snapshot + +from posthog.models.action.action import Action +from posthog.models.hog_functions.hog_function import HogFunction +from posthog.models.user import User +from posthog.test.base import QueryMatchingTest + + +to_dict = lambda x: json.loads(json.dumps(x)) + + +class TestHogFunction(TestCase): + def setUp(self): + super().setUp() + org, team, user = User.objects.bootstrap("Test org", "ben@posthog.com", None) + self.team = team + self.user = user + self.org = org + + def test_hog_function_basic(self): + item = HogFunction.objects.create(name="Test", team=self.team) + assert item.name == "Test" + assert item.hog == "" + assert not item.enabled + + def test_hog_function_team_no_filters_compilation(self): + item = HogFunction.objects.create(name="Test", team=self.team) + + # Some json serialization is needed to compare the bytecode more easily in tests + json_filters = to_dict(item.filters) + assert json_filters["bytecode"] == ["_h", 29] # TRUE + + def test_hog_function_filters_compilation(self): + item = HogFunction.objects.create( + name="Test", + team=self.team, + filters={ + "events": [{"id": "$pageview", "name": "$pageview", "type": "events", "order": 0}], + "actions": [{"id": "9", "name": "Test Action", "type": "actions", "order": 1}], + "filter_test_accounts": True, + }, + ) + + # Some json serialization is needed to compare the bytecode more easily in tests + json_filters = to_dict(item.filters) + + assert json_filters == { + "events": [{"id": "$pageview", "name": "$pageview", "type": "events", "order": 0}], + "actions": [{"id": "9", "name": "Test Action", "type": "actions", "order": 1}], + "filter_test_accounts": True, + "bytecode": [ + "_h", + 33, + 2, + 33, + 1, + 11, + 29, + 32, + "^(localhost|127\\.0\\.0\\.1)($|:)", + 32, + "$host", + 32, + "properties", + 1, + 2, + 2, + "toString", + 1, + 2, + "match", + 2, + 5, + 2, + "ifNull", + 2, + 3, + 2, + 32, + "$pageview", + 32, + "event", + 1, + 1, + 11, + 29, + 32, + "^(localhost|127\\.0\\.0\\.1)($|:)", + 32, + "$host", + 32, + "properties", + 1, + 2, + 2, + "toString", + 1, + 2, + "match", + 2, + 5, + 2, + "ifNull", + 2, + 3, + 2, + 4, + 2, + ], + } + + def test_hog_function_team_filters_only_compilation(self): + item = HogFunction.objects.create( + name="Test", + team=self.team, + filters={ + "filter_test_accounts": True, + }, + ) + + # Some json serialization is needed to compare the bytecode more easily in tests + json_filters = to_dict(item.filters) + + assert json.dumps(json_filters["bytecode"]) == snapshot( + '["_h", 29, 32, "^(localhost|127\\\\.0\\\\.0\\\\.1)($|:)", 32, "$host", 32, "properties", 1, 2, 2, "toString", 1, 2, "match", 2, 5, 2, "ifNull", 2, 3, 1]' + ) + + +class TestHogFunctionsBackgroundReloading(TestCase, QueryMatchingTest): + def setUp(self): + super().setUp() + org, team, user = User.objects.bootstrap("Test org", "ben@posthog.com", None) + self.team = team + self.user = user + self.org = org + + self.action = Action.objects.create( + team=self.team, + name="Test Action", + steps_json=[ + { + "event": "test-event", + "properties": [ + { + "key": "prop-1", + "operator": "exact", + "value": "old-value-1", + "type": "event", + } + ], + } + ], + ) + + self.action2 = Action.objects.create( + team=self.team, + name="Test Action", + steps_json=[ + { + "event": None, + "properties": [ + { + "key": "prop-2", + "operator": "exact", + "value": "old-value-2", + "type": "event", + } + ], + } + ], + ) + + def test_hog_functions_reload_on_action_saved(self): + hog_function_1 = HogFunction.objects.create( + name="func 1", + team=self.team, + filters={ + "actions": [ + {"id": str(self.action.id), "name": "Test Action", "type": "actions", "order": 1}, + {"id": str(self.action2.id), "name": "Test Action 2", "type": "actions", "order": 2}, + ], + }, + ) + hog_function_2 = HogFunction.objects.create( + name="func 2", + team=self.team, + filters={ + "actions": [ + {"id": str(self.action.id), "name": "Test Action", "type": "actions", "order": 1}, + ], + }, + ) + + # Check that the bytecode is correct + assert json.dumps(hog_function_1.filters["bytecode"]) == snapshot( + '["_h", 32, "old-value-2", 32, "prop-2", 32, "properties", 1, 2, 11, 3, 1, 32, "old-value-1", 32, "prop-1", 32, "properties", 1, 2, 11, 32, "test-event", 32, "event", 1, 1, 11, 3, 2, 3, 1, 4, 2]' + ) + + assert json.dumps(hog_function_2.filters["bytecode"]) == snapshot( + '["_h", 32, "old-value-1", 32, "prop-1", 32, "properties", 1, 2, 11, 32, "test-event", 32, "event", 1, 1, 11, 3, 2, 3, 1, 4, 1]' + ) + + # Modify the action and check that the bytecode is updated + self.action.steps_json = [ + { + "event": "test-event", + "properties": [ + { + "key": "prop-1", + "operator": "exact", + "value": "change-value", + "type": "event", + } + ], + } + ] + # 1 update action, 1 load hog functions, 1 load all related actions, 1 bulk update hog functions + with self.assertNumQueries(4): + self.action.save() + hog_function_1.refresh_from_db() + hog_function_2.refresh_from_db() + + assert json.dumps(hog_function_1.filters["bytecode"]) == snapshot( + '["_h", 32, "old-value-2", 32, "prop-2", 32, "properties", 1, 2, 11, 3, 1, 32, "change-value", 32, "prop-1", 32, "properties", 1, 2, 11, 32, "test-event", 32, "event", 1, 1, 11, 3, 2, 3, 1, 4, 2]' + ) + assert json.dumps(hog_function_2.filters["bytecode"]) == snapshot( + '["_h", 32, "change-value", 32, "prop-1", 32, "properties", 1, 2, 11, 32, "test-event", 32, "event", 1, 1, 11, 3, 2, 3, 1, 4, 1]' + ) + + def test_hog_functions_reload_on_team_saved(self): + self.team.test_account_filters = [] + self.team.save() + hog_function_1 = HogFunction.objects.create( + name="func 1", + team=self.team, + filters={ + "filter_test_accounts": True, + }, + ) + hog_function_2 = HogFunction.objects.create( + name="func 2", + team=self.team, + filters={ + "filter_test_accounts": True, + "events": [{"id": "$pageview", "name": "$pageview", "type": "events", "order": 0}], + }, + ) + hog_function_3 = HogFunction.objects.create( + name="func 3", + team=self.team, + filters={ + "filter_test_accounts": False, + }, + ) + + # Check that the bytecode is correct + assert json.dumps(hog_function_1.filters["bytecode"]) == snapshot('["_h", 29]') + assert json.dumps(hog_function_2.filters["bytecode"]) == snapshot( + '["_h", 32, "$pageview", 32, "event", 1, 1, 11, 3, 1, 4, 1]' + ) + assert json.dumps(hog_function_3.filters["bytecode"]) == snapshot('["_h", 29]') + + # Modify the action and check that the bytecode is updated + self.team.test_account_filters = [ + {"key": "$host", "operator": "regex", "value": "^(localhost|127\\.0\\.0\\.1)($|:)"}, + {"key": "$pageview", "operator": "regex", "value": "test"}, + ] + # 1 update team, 1 load hog functions, 1 update hog functions + with self.assertNumQueries(3): + self.team.save() + hog_function_1.refresh_from_db() + hog_function_2.refresh_from_db() + hog_function_3.refresh_from_db() + + assert json.dumps(hog_function_1.filters["bytecode"]) == snapshot( + '["_h", 30, 32, "test", 32, "$pageview", 32, "properties", 1, 2, 2, "toString", 1, 2, "match", 2, 2, "ifNull", 2, 30, 32, "^(localhost|127\\\\.0\\\\.0\\\\.1)($|:)", 32, "$host", 32, "properties", 1, 2, 2, "toString", 1, 2, "match", 2, 2, "ifNull", 2, 3, 2]' + ) + assert json.dumps(hog_function_2.filters["bytecode"]) == snapshot( + '["_h", 32, "$pageview", 32, "event", 1, 1, 11, 30, 32, "test", 32, "$pageview", 32, "properties", 1, 2, 2, "toString", 1, 2, "match", 2, 2, "ifNull", 2, 30, 32, "^(localhost|127\\\\.0\\\\.0\\\\.1)($|:)", 32, "$host", 32, "properties", 1, 2, 2, "toString", 1, 2, "match", 2, 2, "ifNull", 2, 3, 3, 4, 1]' + ) + assert json.dumps(hog_function_3.filters["bytecode"]) == snapshot('["_h", 29]') diff --git a/posthog/queries/breakdown_props.py b/posthog/queries/breakdown_props.py index 23f4b0d51ddc47..010d61aa8d16c8 100644 --- a/posthog/queries/breakdown_props.py +++ b/posthog/queries/breakdown_props.py @@ -36,7 +36,7 @@ HISTOGRAM_ELEMENTS_ARRAY_OF_KEY_SQL, TOP_ELEMENTS_ARRAY_OF_KEY_SQL, ) -from posthog.queries.util import PersonPropertiesMode +from posthog.queries.util import PersonPropertiesMode, alias_poe_mode_for_legacy ALL_USERS_COHORT_ID = 0 @@ -86,7 +86,9 @@ def get_breakdown_prop_values( sessions_join_params: dict = {} null_person_filter = ( - f"AND notEmpty(e.person_id)" if team.person_on_events_mode != PersonsOnEventsMode.DISABLED else "" + f"AND notEmpty(e.person_id)" + if alias_poe_mode_for_legacy(team.person_on_events_mode) != PersonsOnEventsMode.DISABLED + else "" ) if person_properties_mode == PersonPropertiesMode.DIRECT_ON_EVENTS: diff --git a/posthog/queries/event_query/event_query.py b/posthog/queries/event_query/event_query.py index d8816634d6ac12..af70ac5f2672cd 100644 --- a/posthog/queries/event_query/event_query.py +++ b/posthog/queries/event_query/event_query.py @@ -20,7 +20,7 @@ from posthog.queries.query_date_range import QueryDateRange from posthog.schema import PersonsOnEventsMode from posthog.session_recordings.queries.session_query import SessionQuery -from posthog.queries.util import PersonPropertiesMode +from posthog.queries.util import PersonPropertiesMode, alias_poe_mode_for_legacy from posthog.queries.person_on_events_v2_sql import PERSON_DISTINCT_ID_OVERRIDES_JOIN_SQL @@ -88,7 +88,7 @@ def __init__( self._should_join_persons = should_join_persons self._should_join_sessions = should_join_sessions self._extra_fields = extra_fields - self._person_on_events_mode = person_on_events_mode + self._person_on_events_mode = alias_poe_mode_for_legacy(person_on_events_mode) # Guards against a ClickHouse bug involving multiple joins against the same table with the same column name. # This issue manifests for us with formulas, where on queries A and B we join events against itself diff --git a/posthog/queries/funnels/base.py b/posthog/queries/funnels/base.py index 30265cace41e33..ea337267a2c575 100644 --- a/posthog/queries/funnels/base.py +++ b/posthog/queries/funnels/base.py @@ -32,7 +32,7 @@ ) from posthog.queries.funnels.funnel_event_query import FunnelEventQuery from posthog.queries.insight import insight_sync_execute -from posthog.queries.util import correct_result_for_sampling, get_person_properties_mode +from posthog.queries.util import alias_poe_mode_for_legacy, correct_result_for_sampling, get_person_properties_mode from posthog.schema import PersonsOnEventsMode from posthog.utils import relative_date_parse, generate_short_id @@ -730,7 +730,7 @@ def _get_breakdown_select_prop(self) -> tuple[str, dict[str, Any]]: self.params.update({"breakdown": self._filter.breakdown}) if self._filter.breakdown_type == "person": - if self._team.person_on_events_mode != PersonsOnEventsMode.DISABLED: + if alias_poe_mode_for_legacy(self._team.person_on_events_mode) != PersonsOnEventsMode.DISABLED: basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr( self._filter.breakdown, table="events", @@ -760,7 +760,10 @@ def _get_breakdown_select_prop(self) -> tuple[str, dict[str, Any]]: # :TRICKY: We only support string breakdown for group properties assert isinstance(self._filter.breakdown, str) - if self._team.person_on_events_mode != PersonsOnEventsMode.DISABLED and groups_on_events_querying_enabled(): + if ( + alias_poe_mode_for_legacy(self._team.person_on_events_mode) != PersonsOnEventsMode.DISABLED + and groups_on_events_querying_enabled() + ): properties_field = f"group{self._filter.breakdown_group_type_index}_properties" expression, _ = get_property_string_expr( table="events", diff --git a/posthog/queries/groups_join_query/groups_join_query.py b/posthog/queries/groups_join_query/groups_join_query.py index 128398584a352c..6d57f5eb354363 100644 --- a/posthog/queries/groups_join_query/groups_join_query.py +++ b/posthog/queries/groups_join_query/groups_join_query.py @@ -5,6 +5,7 @@ from posthog.models.filters.retention_filter import RetentionFilter from posthog.models.filters.stickiness_filter import StickinessFilter from posthog.queries.column_optimizer.column_optimizer import ColumnOptimizer +from posthog.queries.util import alias_poe_mode_for_legacy from posthog.schema import PersonsOnEventsMode @@ -29,7 +30,7 @@ def __init__( self._team_id = team_id self._column_optimizer = column_optimizer or ColumnOptimizer(self._filter, self._team_id) self._join_key = join_key - self._person_on_events_mode = person_on_events_mode + self._person_on_events_mode = alias_poe_mode_for_legacy(person_on_events_mode) def get_join_query(self) -> tuple[str, dict]: return "", {} diff --git a/posthog/queries/trends/breakdown.py b/posthog/queries/trends/breakdown.py index db6fd0860c38f5..e0bab69fe666d5 100644 --- a/posthog/queries/trends/breakdown.py +++ b/posthog/queries/trends/breakdown.py @@ -74,6 +74,7 @@ process_math, ) from posthog.queries.util import ( + alias_poe_mode_for_legacy, get_interval_func_ch, get_person_properties_mode, get_start_of_interval_sql, @@ -108,7 +109,7 @@ def __init__( self.params: dict[str, Any] = {"team_id": team.pk} self.column_optimizer = column_optimizer or ColumnOptimizer(self.filter, self.team_id) self.add_person_urls = add_person_urls - self.person_on_events_mode = person_on_events_mode + self.person_on_events_mode = alias_poe_mode_for_legacy(person_on_events_mode) if person_on_events_mode == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS: self._person_id_alias = f"if(notEmpty({self.PERSON_ID_OVERRIDES_TABLE_ALIAS}.distinct_id), {self.PERSON_ID_OVERRIDES_TABLE_ALIAS}.person_id, {self.EVENT_TABLE_ALIAS}.person_id)" elif person_on_events_mode == PersonsOnEventsMode.PERSON_ID_NO_OVERRIDE_PROPERTIES_ON_EVENTS: diff --git a/posthog/queries/util.py b/posthog/queries/util.py index 44dac7dd8fdb95..a113ae609517ab 100644 --- a/posthog/queries/util.py +++ b/posthog/queries/util.py @@ -40,6 +40,14 @@ class PersonPropertiesMode(Enum): """ +def alias_poe_mode_for_legacy(persons_on_events_mode: PersonsOnEventsMode) -> PersonsOnEventsMode: + if persons_on_events_mode == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_JOINED: + # PERSON_ID_OVERRIDE_PROPERTIES_JOINED is not implemented in legacy insights + # It's functionally the same as DISABLED, just slower - hence aliasing to DISABLED + return PersonsOnEventsMode.DISABLED + return persons_on_events_mode + + EARLIEST_TIMESTAMP = "2015-01-01" GET_EARLIEST_TIMESTAMP_SQL = """ @@ -178,10 +186,13 @@ def correct_result_for_sampling( def get_person_properties_mode(team: Team) -> PersonPropertiesMode: - if team.person_on_events_mode == PersonsOnEventsMode.DISABLED: + if alias_poe_mode_for_legacy(team.person_on_events_mode) == PersonsOnEventsMode.DISABLED: return PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN - if team.person_on_events_mode == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS: + if ( + alias_poe_mode_for_legacy(team.person_on_events_mode) + == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS + ): return PersonPropertiesMode.DIRECT_ON_EVENTS_WITH_POE_V2 return PersonPropertiesMode.DIRECT_ON_EVENTS diff --git a/posthog/temporal/data_imports/pipelines/helpers.py b/posthog/temporal/data_imports/pipelines/helpers.py index 318d4503ce04e8..9e38be0fd919ec 100644 --- a/posthog/temporal/data_imports/pipelines/helpers.py +++ b/posthog/temporal/data_imports/pipelines/helpers.py @@ -2,22 +2,14 @@ from django.db.models import F from posthog.warehouse.util import database_sync_to_async -CHUNK_SIZE = 10_000 - -async def check_limit( +async def is_job_cancelled( team_id: int, job_id: str, - new_count: int, -): +) -> bool: model = await aget_external_data_job(team_id, job_id) - if new_count >= CHUNK_SIZE: - new_count = 0 - - status = model.status - - return new_count, status + return model.status == ExternalDataJob.Status.CANCELLED @database_sync_to_async diff --git a/posthog/temporal/data_imports/pipelines/rest_source/__init__.py b/posthog/temporal/data_imports/pipelines/rest_source/__init__.py new file mode 100644 index 00000000000000..85ee731cf8fc60 --- /dev/null +++ b/posthog/temporal/data_imports/pipelines/rest_source/__init__.py @@ -0,0 +1,370 @@ +"""Generic API Source""" + +from typing import ( + Any, + Optional, + cast, +) +from collections.abc import AsyncGenerator, Iterator +from collections.abc import Callable +import graphlib # type: ignore[import,unused-ignore] + +import dlt +from dlt.common.validation import validate_dict +from dlt.common import jsonpath +from dlt.common.schema.schema import Schema +from dlt.common.schema.typing import TSchemaContract +from dlt.common.configuration.specs import BaseConfiguration + +from dlt.extract.incremental import Incremental +from dlt.extract.source import DltResource, DltSource + +from dlt.sources.helpers.rest_client.client import RESTClient +from dlt.sources.helpers.rest_client.paginators import BasePaginator +from dlt.sources.helpers.rest_client.typing import HTTPMethodBasic + +from posthog.temporal.data_imports.pipelines.helpers import is_job_cancelled +from .typing import ( + ClientConfig, + ResolvedParam, + Endpoint, + EndpointResource, + RESTAPIConfig, +) +from .config_setup import ( + IncrementalParam, + create_auth, + create_paginator, + build_resource_dependency_graph, + process_parent_data_item, + setup_incremental_object, + create_response_hooks, +) +from .utils import exclude_keys # noqa: F401 + + +def rest_api_source( + config: RESTAPIConfig, + team_id: int, + job_id: str, + name: Optional[str] = None, + section: Optional[str] = None, + max_table_nesting: Optional[int] = None, + root_key: bool = False, + schema: Optional[Schema] = None, + schema_contract: Optional[TSchemaContract] = None, + spec: Optional[type[BaseConfiguration]] = None, +) -> DltSource: + """Creates and configures a REST API source for data extraction. + + Args: + config (RESTAPIConfig): Configuration for the REST API source. + name (str, optional): Name of the source. + section (str, optional): Section of the configuration file. + max_table_nesting (int, optional): Maximum depth of nested table above which + the remaining nodes are loaded as structs or JSON. + root_key (bool, optional): Enables merging on all resources by propagating + root foreign key to child tables. This option is most useful if you + plan to change write disposition of a resource to disable/enable merge. + Defaults to False. + schema (Schema, optional): An explicit `Schema` instance to be associated + with the source. If not present, `dlt` creates a new `Schema` object + with provided `name`. If such `Schema` already exists in the same + folder as the module containing the decorated function, such schema + will be loaded from file. + schema_contract (TSchemaContract, optional): Schema contract settings + that will be applied to this resource. + spec (type[BaseConfiguration], optional): A specification of configuration + and secret values required by the source. + + Returns: + DltSource: A configured dlt source. + + Example: + pokemon_source = rest_api_source({ + "client": { + "base_url": "https://pokeapi.co/api/v2/", + "paginator": "json_response", + }, + "endpoints": { + "pokemon": { + "params": { + "limit": 100, # Default page size is 20 + }, + "resource": { + "primary_key": "id", + } + }, + }, + }) + """ + decorated = dlt.source( + rest_api_resources, + name, + section, + max_table_nesting, + root_key, + schema, + schema_contract, + spec, + ) + + return decorated(config, team_id, job_id) + + +def rest_api_resources(config: RESTAPIConfig, team_id: int, job_id: str) -> list[DltResource]: + """Creates a list of resources from a REST API configuration. + + Args: + config (RESTAPIConfig): Configuration for the REST API source. + + Returns: + list[DltResource]: List of dlt resources. + + Example: + github_source = rest_api_resources({ + "client": { + "base_url": "https://api.github.com/repos/dlt-hub/dlt/", + "auth": { + "token": dlt.secrets["token"], + }, + }, + "resource_defaults": { + "primary_key": "id", + "write_disposition": "merge", + "endpoint": { + "params": { + "per_page": 100, + }, + }, + }, + "resources": [ + { + "name": "issues", + "endpoint": { + "path": "issues", + "params": { + "sort": "updated", + "direction": "desc", + "state": "open", + "since": { + "type": "incremental", + "cursor_path": "updated_at", + "initial_value": "2024-01-25T11:21:28Z", + }, + }, + }, + }, + { + "name": "issue_comments", + "endpoint": { + "path": "issues/{issue_number}/comments", + "params": { + "issue_number": { + "type": "resolve", + "resource": "issues", + "field": "number", + } + }, + }, + }, + ], + }) + """ + + validate_dict(RESTAPIConfig, config, path=".") + + client_config = config["client"] + resource_defaults = config.get("resource_defaults", {}) + resource_list = config["resources"] + + ( + dependency_graph, + endpoint_resource_map, + resolved_param_map, + ) = build_resource_dependency_graph( + resource_defaults, + resource_list, + ) + + resources = create_resources( + client_config, + dependency_graph, + endpoint_resource_map, + resolved_param_map, + team_id=team_id, + job_id=job_id, + ) + + return list(resources.values()) + + +def create_resources( + client_config: ClientConfig, + dependency_graph: graphlib.TopologicalSorter, + endpoint_resource_map: dict[str, EndpointResource], + resolved_param_map: dict[str, Optional[ResolvedParam]], + team_id: int, + job_id: str, +) -> dict[str, DltResource]: + resources = {} + + for resource_name in dependency_graph.static_order(): + resource_name = cast(str, resource_name) + endpoint_resource = endpoint_resource_map[resource_name] + endpoint_config = cast(Endpoint, endpoint_resource.get("endpoint")) + request_params = endpoint_config.get("params", {}) + request_json = endpoint_config.get("json", None) + paginator = create_paginator(endpoint_config.get("paginator")) + + resolved_param: ResolvedParam | None = resolved_param_map[resource_name] + + include_from_parent: list[str] = endpoint_resource.get("include_from_parent", []) + if not resolved_param and include_from_parent: + raise ValueError( + f"Resource {resource_name} has include_from_parent but is not " "dependent on another resource" + ) + + ( + incremental_object, + incremental_param, + ) = setup_incremental_object(request_params, endpoint_config.get("incremental")) + + client = RESTClient( + base_url=client_config.get("base_url"), + headers=client_config.get("headers"), + auth=create_auth(client_config.get("auth")), + paginator=create_paginator(client_config.get("paginator")), + ) + + hooks = create_response_hooks(endpoint_config.get("response_actions")) + + resource_kwargs = exclude_keys(endpoint_resource, {"endpoint", "include_from_parent"}) + + if resolved_param is None: + + async def paginate_resource( + method: HTTPMethodBasic, + path: str, + params: dict[str, Any], + json: Optional[dict[str, Any]], + paginator: Optional[BasePaginator], + data_selector: Optional[jsonpath.TJsonPath], + hooks: Optional[dict[str, Any]], + client: RESTClient = client, + incremental_object: Optional[Incremental[Any]] = incremental_object, + incremental_param: IncrementalParam | None = incremental_param, + ) -> AsyncGenerator[Iterator[Any], Any]: + yield dlt.mark.materialize_table_schema() # type: ignore + + if await is_job_cancelled(team_id=team_id, job_id=job_id): + return + + if incremental_object and incremental_param: + params[incremental_param.start] = incremental_object.last_value + if incremental_param.end: + params[incremental_param.end] = incremental_object.end_value + + yield client.paginate( + method=method, + path=path, + params=params, + json=json, + paginator=paginator, + data_selector=data_selector, + hooks=hooks, + ) + + resources[resource_name] = dlt.resource( + paginate_resource, + **resource_kwargs, # TODO: implement typing.Unpack + )( + method=endpoint_config.get("method", "get"), + path=endpoint_config.get("path"), + params=request_params, + json=request_json, + paginator=paginator, + data_selector=endpoint_config.get("data_selector"), + hooks=hooks, + ) + + else: + predecessor = resources[resolved_param.resolve_config["resource"]] + + base_params = exclude_keys(request_params, {resolved_param.param_name}) + + async def paginate_dependent_resource( + items: list[dict[str, Any]], + method: HTTPMethodBasic, + path: str, + params: dict[str, Any], + paginator: Optional[BasePaginator], + data_selector: Optional[jsonpath.TJsonPath], + hooks: Optional[dict[str, Any]], + client: RESTClient = client, + resolved_param: ResolvedParam = resolved_param, + include_from_parent: list[str] = include_from_parent, + incremental_object: Optional[Incremental[Any]] = incremental_object, + incremental_param: IncrementalParam | None = incremental_param, + ) -> AsyncGenerator[Any, Any]: + yield dlt.mark.materialize_table_schema() # type: ignore + + if await is_job_cancelled(team_id=team_id, job_id=job_id): + return + + if incremental_object and incremental_param: + params[incremental_param.start] = incremental_object.last_value + if incremental_param.end: + params[incremental_param.end] = incremental_object.end_value + + for item in items: + formatted_path, parent_record = process_parent_data_item( + path, item, resolved_param, include_from_parent + ) + + for child_page in client.paginate( + method=method, + path=formatted_path, + params=params, + paginator=paginator, + data_selector=data_selector, + hooks=hooks, + ): + if parent_record: + for child_record in child_page: + child_record.update(parent_record) + yield child_page + + resources[resource_name] = dlt.resource( # type: ignore[call-overload] + paginate_dependent_resource, + data_from=predecessor, + **resource_kwargs, # TODO: implement typing.Unpack + )( + method=endpoint_config.get("method", "get"), + path=endpoint_config.get("path"), + params=base_params, + paginator=paginator, + data_selector=endpoint_config.get("data_selector"), + hooks=hooks, + ) + + return resources + + +# XXX: This is a workaround pass test_dlt_init.py +# since the source uses dlt.source as a function +def _register_source(source_func: Callable[..., DltSource]) -> None: + import inspect + from dlt.common.configuration import get_fun_spec + from dlt.common.source import _SOURCES, SourceInfo + + spec = get_fun_spec(source_func) + func_module = inspect.getmodule(source_func) + _SOURCES[source_func.__name__] = SourceInfo( + SPEC=spec, + f=source_func, + module=func_module, + ) + + +_register_source(rest_api_source) diff --git a/posthog/temporal/data_imports/pipelines/rest_source/config_setup.py b/posthog/temporal/data_imports/pipelines/rest_source/config_setup.py new file mode 100644 index 00000000000000..9eda391449d312 --- /dev/null +++ b/posthog/temporal/data_imports/pipelines/rest_source/config_setup.py @@ -0,0 +1,455 @@ +from copy import copy +from typing import ( + Any, + Optional, + cast, + NamedTuple, +) +from collections.abc import Callable +import graphlib # type: ignore[import,unused-ignore] +import string + +import dlt +from dlt.common import logger +from dlt.common.configuration import resolve_configuration +from dlt.common.schema.utils import merge_columns +from dlt.common.utils import update_dict_nested +from dlt.common import jsonpath + +from dlt.extract.incremental import Incremental +from dlt.extract.utils import ensure_table_schema_columns + +from dlt.sources.helpers.requests import Response +from dlt.sources.helpers.rest_client.paginators import ( + BasePaginator, + SinglePagePaginator, + HeaderLinkPaginator, + JSONResponsePaginator, + JSONResponseCursorPaginator, + OffsetPaginator, + PageNumberPaginator, +) +from dlt.sources.helpers.rest_client.detector import single_entity_path +from dlt.sources.helpers.rest_client.exceptions import IgnoreResponseException +from dlt.sources.helpers.rest_client.auth import ( + AuthConfigBase, + HttpBasicAuth, + BearerTokenAuth, + APIKeyAuth, +) + +from .typing import ( + EndpointResourceBase, + PaginatorType, + AuthType, + AuthConfig, + IncrementalArgs, + IncrementalConfig, + PaginatorConfig, + ResolvedParam, + ResponseAction, + Endpoint, + EndpointResource, +) +from .utils import exclude_keys + + +PAGINATOR_MAP: dict[PaginatorType, type[BasePaginator]] = { + "json_response": JSONResponsePaginator, + "header_link": HeaderLinkPaginator, + "auto": None, + "single_page": SinglePagePaginator, + "cursor": JSONResponseCursorPaginator, + "offset": OffsetPaginator, + "page_number": PageNumberPaginator, +} + +AUTH_MAP: dict[AuthType, type[AuthConfigBase]] = { + "bearer": BearerTokenAuth, + "api_key": APIKeyAuth, + "http_basic": HttpBasicAuth, +} + + +class IncrementalParam(NamedTuple): + start: str + end: Optional[str] + + +def get_paginator_class(paginator_type: PaginatorType) -> type[BasePaginator]: + try: + return PAGINATOR_MAP[paginator_type] + except KeyError: + available_options = ", ".join(PAGINATOR_MAP.keys()) + raise ValueError(f"Invalid paginator: {paginator_type}. " f"Available options: {available_options}") + + +def create_paginator( + paginator_config: Optional[PaginatorConfig], +) -> Optional[BasePaginator]: + if isinstance(paginator_config, BasePaginator): + return paginator_config + + if isinstance(paginator_config, str): + paginator_class = get_paginator_class(paginator_config) + try: + # `auto` has no associated class in `PAGINATOR_MAP` + return paginator_class() if paginator_class else None + except TypeError: + raise ValueError( + f"Paginator {paginator_config} requires arguments to create an instance. Use {paginator_class} instance instead." + ) + + if isinstance(paginator_config, dict): + paginator_type = paginator_config.get("type", "auto") + paginator_class = get_paginator_class(paginator_type) + return paginator_class(**exclude_keys(paginator_config, {"type"})) if paginator_class else None + + return None + + +def get_auth_class(auth_type: AuthType) -> type[AuthConfigBase]: + try: + return AUTH_MAP[auth_type] + except KeyError: + available_options = ", ".join(AUTH_MAP.keys()) + raise ValueError(f"Invalid paginator: {auth_type}. " f"Available options: {available_options}") + + +def create_auth(auth_config: Optional[AuthConfig]) -> Optional[AuthConfigBase]: + auth: AuthConfigBase = None + if isinstance(auth_config, AuthConfigBase): + auth = auth_config + + if isinstance(auth_config, str): + auth_class = get_auth_class(auth_config) + auth = auth_class() + + if isinstance(auth_config, dict): + auth_type = auth_config.get("type", "bearer") + auth_class = get_auth_class(auth_type) + auth = auth_class(**exclude_keys(auth_config, {"type"})) + + if auth: + # TODO: provide explicitly (non-default) values as explicit explicit_value=dict(auth) + # this will resolve auth which is a configuration using current section context + return resolve_configuration(auth) + + return None + + +def setup_incremental_object( + request_params: dict[str, Any], + incremental_config: Optional[IncrementalConfig] = None, +) -> tuple[Optional[Incremental[Any]], Optional[IncrementalParam]]: + for key, value in request_params.items(): + if isinstance(value, dlt.sources.incremental): + return value, IncrementalParam(start=key, end=None) + if isinstance(value, dict) and value.get("type") == "incremental": + config = exclude_keys(value, {"type"}) + # TODO: implement param type to bind incremental to + return ( + dlt.sources.incremental(**config), + IncrementalParam(start=key, end=None), + ) + if incremental_config: + config = exclude_keys(incremental_config, {"start_param", "end_param"}) + return ( + dlt.sources.incremental(**cast(IncrementalArgs, config)), + IncrementalParam( + start=incremental_config["start_param"], + end=incremental_config.get("end_param"), + ), + ) + + return None, None + + +def make_parent_key_name(resource_name: str, field_name: str) -> str: + return f"_{resource_name}_{field_name}" + + +def build_resource_dependency_graph( + resource_defaults: EndpointResourceBase, + resource_list: list[str | EndpointResource], +) -> tuple[Any, dict[str, EndpointResource], dict[str, Optional[ResolvedParam]]]: + dependency_graph = graphlib.TopologicalSorter() + endpoint_resource_map: dict[str, EndpointResource] = {} + resolved_param_map: dict[str, ResolvedParam] = {} + + # expand all resources and index them + for resource_kwargs in resource_list: + if isinstance(resource_kwargs, dict): + # clone resource here, otherwise it needs to be cloned in several other places + # note that this clones only dict structure, keeping all instances without deepcopy + resource_kwargs = update_dict_nested({}, resource_kwargs) # type: ignore[assignment] + + endpoint_resource = _make_endpoint_resource(resource_kwargs, resource_defaults) + assert isinstance(endpoint_resource["endpoint"], dict) + _setup_single_entity_endpoint(endpoint_resource["endpoint"]) + _bind_path_params(endpoint_resource) + + resource_name = endpoint_resource["name"] + assert isinstance(resource_name, str), f"Resource name must be a string, got {type(resource_name)}" + + if resource_name in endpoint_resource_map: + raise ValueError(f"Resource {resource_name} has already been defined") + endpoint_resource_map[resource_name] = endpoint_resource + + # create dependency graph + for resource_name, endpoint_resource in endpoint_resource_map.items(): + assert isinstance(endpoint_resource["endpoint"], dict) + # connect transformers to resources via resolved params + resolved_params = _find_resolved_params(endpoint_resource["endpoint"]) + if len(resolved_params) > 1: + raise ValueError(f"Multiple resolved params for resource {resource_name}: {resolved_params}") + elif len(resolved_params) == 1: + resolved_param = resolved_params[0] + predecessor = resolved_param.resolve_config["resource"] + if predecessor not in endpoint_resource_map: + raise ValueError( + f"A transformer resource {resource_name} refers to non existing parent resource {predecessor} on {resolved_param}" + ) + dependency_graph.add(resource_name, predecessor) + resolved_param_map[resource_name] = resolved_param + else: + dependency_graph.add(resource_name) + resolved_param_map[resource_name] = None + + return dependency_graph, endpoint_resource_map, resolved_param_map + + +def _make_endpoint_resource(resource: str | EndpointResource, default_config: EndpointResourceBase) -> EndpointResource: + """ + Creates an EndpointResource object based on the provided resource + definition and merges it with the default configuration. + + This function supports defining a resource in multiple formats: + - As a string: The string is interpreted as both the resource name + and its endpoint path. + - As a dictionary: The dictionary must include `name` and `endpoint` + keys. The `endpoint` can be a string representing the path, + or a dictionary for more complex configurations. If the `endpoint` + is missing the `path` key, the resource name is used as the `path`. + """ + if isinstance(resource, str): + resource = {"name": resource, "endpoint": {"path": resource}} + return _merge_resource_endpoints(default_config, resource) + + if "endpoint" in resource: + if isinstance(resource["endpoint"], str): + resource["endpoint"] = {"path": resource["endpoint"]} + else: + # endpoint is optional + resource["endpoint"] = {} + + if "path" not in resource["endpoint"]: + resource["endpoint"]["path"] = resource["name"] # type: ignore + + return _merge_resource_endpoints(default_config, resource) + + +def _bind_path_params(resource: EndpointResource) -> None: + """Binds params declared in path to params available in `params`. Pops the + bound params but. Params of type `resolve` and `incremental` are skipped + and bound later. + """ + path_params: dict[str, Any] = {} + assert isinstance(resource["endpoint"], dict) # type guard + resolve_params = [r.param_name for r in _find_resolved_params(resource["endpoint"])] + path = resource["endpoint"]["path"] + for format_ in string.Formatter().parse(path): + name = format_[1] + if name: + params = resource["endpoint"].get("params", {}) + if name not in params and name not in path_params: + raise ValueError( + f"The path {path} defined in resource {resource['name']} requires param with name {name} but it is not found in {params}" + ) + if name in resolve_params: + resolve_params.remove(name) + if name in params: + if not isinstance(params[name], dict): + # bind resolved param and pop it from endpoint + path_params[name] = params.pop(name) + else: + param_type = params[name].get("type") + if param_type != "resolve": + raise ValueError( + f"The path {path} defined in resource {resource['name']} tries to bind param {name} with type {param_type}. Paths can only bind 'resource' type params." + ) + # resolved params are bound later + path_params[name] = "{" + name + "}" + + if len(resolve_params) > 0: + raise NotImplementedError( + f"Resource {resource['name']} defines resolve params {resolve_params} that are not bound in path {path}. Resolve query params not supported yet." + ) + + resource["endpoint"]["path"] = path.format(**path_params) + + +def _setup_single_entity_endpoint(endpoint: Endpoint) -> Endpoint: + """Tries to guess if the endpoint refers to a single entity and when detected: + * if `data_selector` was not specified (or is None), "$" is selected + * if `paginator` was not specified (or is None), SinglePagePaginator is selected + + Endpoint is modified in place and returned + """ + # try to guess if list of entities or just single entity is returned + if single_entity_path(endpoint["path"]): + if endpoint.get("data_selector") is None: + endpoint["data_selector"] = "$" + if endpoint.get("paginator") is None: + endpoint["paginator"] = SinglePagePaginator() + return endpoint + + +def _find_resolved_params(endpoint_config: Endpoint) -> list[ResolvedParam]: + """ + Find all resolved params in the endpoint configuration and return + a list of ResolvedParam objects. + + Resolved params are of type ResolveParamConfig (bound param with a key "type" set to "resolve".) + """ + return [ + ResolvedParam(key, value) # type: ignore[arg-type] + for key, value in endpoint_config.get("params", {}).items() + if (isinstance(value, dict) and value.get("type") == "resolve") + ] + + +def _handle_response_actions(response: Response, actions: list[ResponseAction]) -> Optional[str]: + """Handle response actions based on the response and the provided actions.""" + content = response.text + + for action in actions: + status_code = action.get("status_code") + content_substr: str = action.get("content") + action_type: str = action.get("action") + + if status_code is not None and content_substr is not None: + if response.status_code == status_code and content_substr in content: + return action_type + + elif status_code is not None: + if response.status_code == status_code: + return action_type + + elif content_substr is not None: + if content_substr in content: + return action_type + + return None + + +def _create_response_actions_hook( + response_actions: list[ResponseAction], +) -> Callable[[Response, Any, Any], None]: + def response_actions_hook(response: Response, *args: Any, **kwargs: Any) -> None: + action_type = _handle_response_actions(response, response_actions) + if action_type == "ignore": + logger.info(f"Ignoring response with code {response.status_code} " f"and content '{response.json()}'.") + raise IgnoreResponseException + + # If no action has been taken and the status code indicates an error, + # raise an HTTP error based on the response status + if not action_type and response.status_code >= 400: + response.raise_for_status() + + return response_actions_hook + + +def create_response_hooks( + response_actions: Optional[list[ResponseAction]], +) -> Optional[dict[str, Any]]: + """Create response hooks based on the provided response actions. Note + that if the error status code is not handled by the response actions, + the default behavior is to raise an HTTP error. + + Example: + response_actions = [ + {"status_code": 404, "action": "ignore"}, + {"content": "Not found", "action": "ignore"}, + {"status_code": 429, "action": "retry"}, + {"status_code": 200, "content": "some text", "action": "retry"}, + ] + hooks = create_response_hooks(response_actions) + """ + if response_actions: + return {"response": [_create_response_actions_hook(response_actions)]} + return None + + +def process_parent_data_item( + path: str, + item: dict[str, Any], + resolved_param: ResolvedParam, + include_from_parent: list[str], +) -> tuple[str, dict[str, Any]]: + parent_resource_name = resolved_param.resolve_config["resource"] + + field_values = jsonpath.find_values(resolved_param.field_path, item) + + if not field_values: + field_path = resolved_param.resolve_config["field"] + raise ValueError( + f"Transformer expects a field '{field_path}' to be present in the incoming data from resource {parent_resource_name} in order to bind it to path param {resolved_param.param_name}. Available parent fields are {', '.join(item.keys())}" + ) + bound_path = path.format(**{resolved_param.param_name: field_values[0]}) + + parent_record: dict[str, Any] = {} + if include_from_parent: + for parent_key in include_from_parent: + child_key = make_parent_key_name(parent_resource_name, parent_key) + if parent_key not in item: + raise ValueError( + f"Transformer expects a field '{parent_key}' to be present in the incoming data from resource {parent_resource_name} in order to include it in child records under {child_key}. Available parent fields are {', '.join(item.keys())}" + ) + parent_record[child_key] = item[parent_key] + + return bound_path, parent_record + + +def _merge_resource_endpoints(default_config: EndpointResourceBase, config: EndpointResource) -> EndpointResource: + """Merges `default_config` and `config`, returns new instance of EndpointResource""" + # NOTE: config is normalized and always has "endpoint" field which is a dict + # TODO: could deep merge paginators and auths of the same type + + default_endpoint = default_config.get("endpoint", Endpoint()) + assert isinstance(default_endpoint, dict) + config_endpoint = config["endpoint"] + assert isinstance(config_endpoint, dict) + + merged_endpoint: Endpoint = { + **default_endpoint, + **{k: v for k, v in config_endpoint.items() if k not in ("json", "params")}, # type: ignore[typeddict-item] + } + # merge endpoint, only params and json are allowed to deep merge + if "json" in config_endpoint: + merged_endpoint["json"] = { + **(merged_endpoint.get("json", {})), + **config_endpoint["json"], + } + if "params" in config_endpoint: + merged_endpoint["params"] = { + **(merged_endpoint.get("json", {})), + **config_endpoint["params"], + } + # merge columns + if (default_columns := default_config.get("columns")) and (columns := config.get("columns")): + # merge only native dlt formats, skip pydantic and others + if isinstance(columns, list | dict) and isinstance(default_columns, list | dict): + # normalize columns + columns = ensure_table_schema_columns(columns) + default_columns = ensure_table_schema_columns(default_columns) + # merge columns with deep merging hints + config["columns"] = merge_columns(copy(default_columns), columns, merge_columns=True) + + # no need to deep merge resources + merged_resource: EndpointResource = { + **default_config, + **config, + "endpoint": merged_endpoint, + } + return merged_resource diff --git a/posthog/temporal/data_imports/pipelines/rest_source/exceptions.py b/posthog/temporal/data_imports/pipelines/rest_source/exceptions.py new file mode 100644 index 00000000000000..93e807d29b9fbc --- /dev/null +++ b/posthog/temporal/data_imports/pipelines/rest_source/exceptions.py @@ -0,0 +1,5 @@ +from dlt.common.exceptions import DltException + + +class RestApiException(DltException): + pass diff --git a/posthog/temporal/data_imports/pipelines/rest_source/typing.py b/posthog/temporal/data_imports/pipelines/rest_source/typing.py new file mode 100644 index 00000000000000..4a28912ccb2380 --- /dev/null +++ b/posthog/temporal/data_imports/pipelines/rest_source/typing.py @@ -0,0 +1,254 @@ +from typing import ( + Any, + Literal, + Optional, + TypedDict, +) +from dataclasses import dataclass, field + +from dlt.common import jsonpath +from dlt.common.typing import TSortOrder +from dlt.common.schema.typing import ( + TColumnNames, + TTableFormat, + TAnySchemaColumns, + TWriteDispositionConfig, + TSchemaContract, +) + +from dlt.extract.items import TTableHintTemplate +from dlt.extract.incremental.typing import LastValueFunc + +from dlt.sources.helpers.rest_client.paginators import BasePaginator +from dlt.sources.helpers.rest_client.typing import HTTPMethodBasic +from dlt.sources.helpers.rest_client.auth import AuthConfigBase, TApiKeyLocation + +from dlt.sources.helpers.rest_client.paginators import ( + SinglePagePaginator, + HeaderLinkPaginator, + JSONResponsePaginator, + JSONResponseCursorPaginator, + OffsetPaginator, + PageNumberPaginator, +) +from dlt.sources.helpers.rest_client.auth import ( + HttpBasicAuth, + BearerTokenAuth, + APIKeyAuth, +) + +PaginatorType = Literal[ + "json_response", + "header_link", + "auto", + "single_page", + "cursor", + "offset", + "page_number", +] + + +class PaginatorTypeConfig(TypedDict, total=True): + type: PaginatorType # noqa + + +class PageNumberPaginatorConfig(PaginatorTypeConfig, total=False): + """A paginator that uses page number-based pagination strategy.""" + + initial_page: Optional[int] + page_param: Optional[str] + total_path: Optional[jsonpath.TJsonPath] + maximum_page: Optional[int] + + +class OffsetPaginatorConfig(PaginatorTypeConfig, total=False): + """A paginator that uses offset-based pagination strategy.""" + + limit: int + offset: Optional[int] + offset_param: Optional[str] + limit_param: Optional[str] + total_path: Optional[jsonpath.TJsonPath] + maximum_offset: Optional[int] + + +class HeaderLinkPaginatorConfig(PaginatorTypeConfig, total=False): + """A paginator that uses the 'Link' header in HTTP responses + for pagination.""" + + links_next_key: Optional[str] + + +class JSONResponsePaginatorConfig(PaginatorTypeConfig, total=False): + """Locates the next page URL within the JSON response body. The key + containing the URL can be specified using a JSON path.""" + + next_url_path: Optional[jsonpath.TJsonPath] + + +class JSONResponseCursorPaginatorConfig(PaginatorTypeConfig, total=False): + """Uses a cursor parameter for pagination, with the cursor value found in + the JSON response body.""" + + cursor_path: Optional[jsonpath.TJsonPath] + cursor_param: Optional[str] + + +PaginatorConfig = ( + PaginatorType + | PageNumberPaginatorConfig + | OffsetPaginatorConfig + | HeaderLinkPaginatorConfig + | JSONResponsePaginatorConfig + | JSONResponseCursorPaginatorConfig + | BasePaginator + | SinglePagePaginator + | HeaderLinkPaginator + | JSONResponsePaginator + | JSONResponseCursorPaginator + | OffsetPaginator + | PageNumberPaginator +) + + +AuthType = Literal["bearer", "api_key", "http_basic"] + + +class AuthTypeConfig(TypedDict, total=True): + type: AuthType # noqa + + +class BearerTokenAuthConfig(TypedDict, total=False): + """Uses `token` for Bearer authentication in "Authorization" header.""" + + # we allow for a shorthand form of bearer auth, without a type + type: Optional[AuthType] # noqa + token: str + + +class ApiKeyAuthConfig(AuthTypeConfig, total=False): + """Uses provided `api_key` to create authorization data in the specified `location` (query, param, header, cookie) under specified `name`""" + + name: Optional[str] + api_key: str + location: Optional[TApiKeyLocation] + + +class HttpBasicAuthConfig(AuthTypeConfig, total=True): + """Uses HTTP basic authentication""" + + username: str + password: str + + +# TODO: add later +# class OAuthJWTAuthConfig(AuthTypeConfig, total=True): + + +AuthConfig = ( + AuthConfigBase + | AuthType + | BearerTokenAuthConfig + | ApiKeyAuthConfig + | HttpBasicAuthConfig + | BearerTokenAuth + | APIKeyAuth + | HttpBasicAuth +) + + +class ClientConfig(TypedDict, total=False): + base_url: str + headers: Optional[dict[str, str]] + auth: Optional[AuthConfig] + paginator: Optional[PaginatorConfig] + + +class IncrementalArgs(TypedDict, total=False): + cursor_path: str + initial_value: Optional[str] + last_value_func: Optional[LastValueFunc[str]] + primary_key: Optional[TTableHintTemplate[TColumnNames]] + end_value: Optional[str] + row_order: Optional[TSortOrder] + + +class IncrementalConfig(IncrementalArgs, total=False): + start_param: str + end_param: Optional[str] + + +ParamBindType = Literal["resolve", "incremental"] + + +class ParamBindConfig(TypedDict): + type: ParamBindType # noqa + + +class ResolveParamConfig(ParamBindConfig): + resource: str + field: str + + +class IncrementalParamConfig(ParamBindConfig, IncrementalArgs): + pass + # TODO: implement param type to bind incremental to + # param_type: Optional[Literal["start_param", "end_param"]] + + +@dataclass +class ResolvedParam: + param_name: str + resolve_config: ResolveParamConfig + field_path: jsonpath.TJsonPath = field(init=False) + + def __post_init__(self) -> None: + self.field_path = jsonpath.compile_path(self.resolve_config["field"]) + + +class ResponseAction(TypedDict, total=False): + status_code: Optional[int | str] + content: Optional[str] + action: str + + +class Endpoint(TypedDict, total=False): + path: Optional[str] + method: Optional[HTTPMethodBasic] + params: Optional[dict[str, ResolveParamConfig | IncrementalParamConfig | Any]] + json: Optional[dict[str, Any]] + paginator: Optional[PaginatorConfig] + data_selector: Optional[jsonpath.TJsonPath] + response_actions: Optional[list[ResponseAction]] + incremental: Optional[IncrementalConfig] + + +class ResourceBase(TypedDict, total=False): + """Defines hints that may be passed to `dlt.resource` decorator""" + + table_name: Optional[TTableHintTemplate[str]] + max_table_nesting: Optional[int] + write_disposition: Optional[TTableHintTemplate[TWriteDispositionConfig]] + parent: Optional[TTableHintTemplate[str]] + columns: Optional[TTableHintTemplate[TAnySchemaColumns]] + primary_key: Optional[TTableHintTemplate[TColumnNames]] + merge_key: Optional[TTableHintTemplate[TColumnNames]] + schema_contract: Optional[TTableHintTemplate[TSchemaContract]] + table_format: Optional[TTableHintTemplate[TTableFormat]] + selected: Optional[bool] + parallelized: Optional[bool] + + +class EndpointResourceBase(ResourceBase, total=False): + endpoint: Optional[str | Endpoint] + include_from_parent: Optional[list[str]] + + +class EndpointResource(EndpointResourceBase, total=False): + name: TTableHintTemplate[str] + + +class RESTAPIConfig(TypedDict): + client: ClientConfig + resource_defaults: Optional[EndpointResourceBase] + resources: list[str | EndpointResource] diff --git a/posthog/temporal/data_imports/pipelines/rest_source/utils.py b/posthog/temporal/data_imports/pipelines/rest_source/utils.py new file mode 100644 index 00000000000000..91eca3cf480049 --- /dev/null +++ b/posthog/temporal/data_imports/pipelines/rest_source/utils.py @@ -0,0 +1,36 @@ +from typing import Any +from collections.abc import Mapping, Iterable + +from dlt.common import logger +from dlt.extract.source import DltSource + + +def join_url(base_url: str, path: str) -> str: + if not base_url.endswith("/"): + base_url += "/" + return base_url + path.lstrip("/") + + +def exclude_keys(d: Mapping[str, Any], keys: Iterable[str]) -> dict[str, Any]: + """Removes specified keys from a dictionary and returns a new dictionary. + + Args: + d (Mapping[str, Any]): The dictionary to remove keys from. + keys (Iterable[str]): The keys to remove. + + Returns: + Dict[str, Any]: A new dictionary with the specified keys removed. + """ + return {k: v for k, v in d.items() if k not in keys} + + +def check_connection( + source: DltSource, + *resource_names: str, +) -> tuple[bool, str]: + try: + list(source.with_resources(*resource_names).add_limit(1)) + return (True, "") + except Exception as e: + logger.error(f"Error checking connection: {e}") + return (False, str(e)) diff --git a/posthog/temporal/data_imports/pipelines/stripe/__init__.py b/posthog/temporal/data_imports/pipelines/stripe/__init__.py index 228e94778e6890..d74c04ca5e7dc6 100644 --- a/posthog/temporal/data_imports/pipelines/stripe/__init__.py +++ b/posthog/temporal/data_imports/pipelines/stripe/__init__.py @@ -3,15 +3,17 @@ from dlt.sources.helpers.requests import Response, Request from posthog.temporal.data_imports.pipelines.rest_source import RESTAPIConfig, rest_api_resources from posthog.temporal.data_imports.pipelines.rest_source.typing import EndpointResource +from posthog.warehouse.models.external_table_definitions import get_dlt_mapping_for_external_table def get_resource(name: str, is_incremental: bool) -> EndpointResource: resources: dict[str, EndpointResource] = { "BalanceTransaction": { "name": "BalanceTransaction", - "table_name": "balance_transaction", + "table_name": "balancetransaction", "primary_key": "id", "write_disposition": "merge", + "columns": get_dlt_mapping_for_external_table("stripe_balancetransaction"), # type: ignore "endpoint": { "data_selector": "data", "path": "/v1/balance_transactions", @@ -34,6 +36,7 @@ def get_resource(name: str, is_incremental: bool) -> EndpointResource: "table_name": "charge", "primary_key": "id", "write_disposition": "merge", + "columns": get_dlt_mapping_for_external_table("stripe_charge"), # type: ignore "endpoint": { "data_selector": "data", "path": "/v1/charges", @@ -55,6 +58,7 @@ def get_resource(name: str, is_incremental: bool) -> EndpointResource: "table_name": "customer", "primary_key": "id", "write_disposition": "merge", + "columns": get_dlt_mapping_for_external_table("stripe_customer"), # type: ignore "endpoint": { "data_selector": "data", "path": "/v1/customers", @@ -75,6 +79,7 @@ def get_resource(name: str, is_incremental: bool) -> EndpointResource: "table_name": "invoice", "primary_key": "id", "write_disposition": "merge", + "columns": get_dlt_mapping_for_external_table("stripe_invoice"), # type: ignore "endpoint": { "data_selector": "data", "path": "/v1/invoices", @@ -104,6 +109,7 @@ def get_resource(name: str, is_incremental: bool) -> EndpointResource: "table_name": "price", "primary_key": "id", "write_disposition": "merge", + "columns": get_dlt_mapping_for_external_table("stripe_price"), # type: ignore "endpoint": { "data_selector": "data", "path": "/v1/prices", @@ -128,6 +134,7 @@ def get_resource(name: str, is_incremental: bool) -> EndpointResource: "table_name": "product", "primary_key": "id", "write_disposition": "merge", + "columns": get_dlt_mapping_for_external_table("stripe_product"), # type: ignore "endpoint": { "data_selector": "data", "path": "/v1/products", @@ -150,6 +157,7 @@ def get_resource(name: str, is_incremental: bool) -> EndpointResource: "table_name": "subscription", "primary_key": "id", "write_disposition": "merge", + "columns": get_dlt_mapping_for_external_table("stripe_subscription"), # type: ignore "endpoint": { "data_selector": "data", "path": "/v1/subscriptions", @@ -201,7 +209,9 @@ def update_request(self, request: Request) -> None: @dlt.source(max_table_nesting=0) -def stripe_source(api_key: str, account_id: str, endpoint: str, is_incremental: bool = False): +def stripe_source( + api_key: str, account_id: str, endpoint: str, team_id: int, job_id: str, is_incremental: bool = False +): config: RESTAPIConfig = { "client": { "base_url": "https://api.stripe.com/", @@ -222,4 +232,4 @@ def stripe_source(api_key: str, account_id: str, endpoint: str, is_incremental: "resources": [get_resource(endpoint, is_incremental)], } - yield from rest_api_resources(config) + yield from rest_api_resources(config, team_id, job_id) diff --git a/posthog/temporal/data_imports/pipelines/test/test_pipeline.py b/posthog/temporal/data_imports/pipelines/test/test_pipeline.py index 23fc37c6d80f8a..435bcda33a9c5a 100644 --- a/posthog/temporal/data_imports/pipelines/test/test_pipeline.py +++ b/posthog/temporal/data_imports/pipelines/test/test_pipeline.py @@ -49,7 +49,14 @@ async def _create_pipeline(self, schema_name: str, incremental: bool): job_type="Stripe", team_id=self.team.pk, ), - source=stripe_source(api_key="", account_id="", endpoint=schema_name, is_incremental=False), + source=stripe_source( + api_key="", + account_id="", + endpoint=schema_name, + is_incremental=False, + team_id=self.team.pk, + job_id=str(job.pk), + ), logger=structlog.get_logger(), incremental=incremental, ) diff --git a/posthog/temporal/data_imports/workflow_activities/import_data.py b/posthog/temporal/data_imports/workflow_activities/import_data.py index 9062d389415ed9..f622c42bf7e8a7 100644 --- a/posthog/temporal/data_imports/workflow_activities/import_data.py +++ b/posthog/temporal/data_imports/workflow_activities/import_data.py @@ -62,9 +62,13 @@ async def import_data_activity(inputs: ImportDataActivityInputs) -> tuple[TSchem if not stripe_secret_key: raise ValueError(f"Stripe secret key not found for job {model.id}") - # TODO: add in check_limit to rest_source source = stripe_source( - api_key=stripe_secret_key, account_id=account_id, endpoint=schema.name, is_incremental=schema.is_incremental + api_key=stripe_secret_key, + account_id=account_id, + endpoint=schema.name, + team_id=inputs.team_id, + job_id=inputs.run_id, + is_incremental=schema.is_incremental, ) return await _run(job_inputs=job_inputs, source=source, logger=logger, inputs=inputs, schema=schema) diff --git a/posthog/temporal/tests/external_data/test_external_data_job.py b/posthog/temporal/tests/external_data/test_external_data_job.py index 33363e24d5854c..c05cf9c181dca4 100644 --- a/posthog/temporal/tests/external_data/test_external_data_job.py +++ b/posthog/temporal/tests/external_data/test_external_data_job.py @@ -493,7 +493,7 @@ def mock_customers_paginate( assert len(job_1_customer_objects["Contents"]) == 1 await sync_to_async(job_1.refresh_from_db)() - assert job_1.rows_synced == 1 + assert job_1.rows_synced == 0 @pytest.mark.django_db(transaction=True) @@ -554,7 +554,6 @@ def mock_customers_paginate( with ( mock.patch.object(RESTClient, "paginate", mock_customers_paginate), - mock.patch("posthog.temporal.data_imports.pipelines.helpers.CHUNK_SIZE", 0), override_settings( BUCKET_URL=f"s3://{BUCKET_NAME}", AIRBYTE_BUCKET_KEY=settings.OBJECT_STORAGE_ACCESS_KEY_ID,