diff --git a/ee/api/test/test_hooks.py b/ee/api/test/test_hooks.py index 0017079f4a77d..3cfa9595ce1c8 100644 --- a/ee/api/test/test_hooks.py +++ b/ee/api/test/test_hooks.py @@ -139,8 +139,9 @@ def test_create_hog_function_via_hook(self): "target": "https://hooks.zapier.com/{inputs.hook}", }, }, + "order": 2, }, - "debug": {}, + "debug": {"order": 1}, "hook": { "bytecode": [ "_H", @@ -149,6 +150,7 @@ def test_create_hog_function_via_hook(self): "hooks/standard/1234/abcd", ], "value": "hooks/standard/1234/abcd", + "order": 0, }, } diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index 631f8c8438444..1b843ca04c513 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -10,6 +10,7 @@ import { status } from '../utils/status' import { HogFunctionManager } from './hog-function-manager' import { CyclotronFetchFailureInfo, + HogFunctionInputType, HogFunctionInvocation, HogFunctionInvocationGlobals, HogFunctionInvocationGlobalsWithInputs, @@ -103,6 +104,16 @@ const sanitizeLogMessage = (args: any[], sensitiveValues?: string[]): string => return message } +const orderInputsByDependency = (hogFunction: HogFunctionType): [string, HogFunctionInputType][] => { + const allInputs: HogFunctionType['inputs'] = { + ...hogFunction.inputs, + ...hogFunction.encrypted_inputs, + } + return Object.entries(allInputs).sort(([_, input1], [__, input2]) => { + return (input1.order ?? -1) - (input2.order ?? -1) + }) +} + export class HogExecutor { private telemetryMatcher: ValueMatcher @@ -529,30 +540,23 @@ export class HogExecutor { } buildHogFunctionGlobals(invocation: HogFunctionInvocation): HogFunctionInvocationGlobalsWithInputs { - const builtInputs: Record = {} + const newGlobals: HogFunctionInvocationGlobalsWithInputs = { + ...invocation.globals, + inputs: {}, + } - Object.entries(invocation.hogFunction.inputs ?? {}).forEach(([key, item]) => { - builtInputs[key] = item.value + const orderedInputs = orderInputsByDependency(invocation.hogFunction) - if (item.bytecode) { - // Use the bytecode to compile the field - builtInputs[key] = formatInput(item.bytecode, invocation.globals, key) - } - }) - - Object.entries(invocation.hogFunction.encrypted_inputs ?? {}).forEach(([key, item]) => { - builtInputs[key] = item.value + for (const [key, input] of orderedInputs) { + newGlobals.inputs[key] = input.value - if (item.bytecode) { + if (input.bytecode) { // Use the bytecode to compile the field - builtInputs[key] = formatInput(item.bytecode, invocation.globals, key) + newGlobals.inputs[key] = formatInput(input.bytecode, newGlobals, key) } - }) - - return { - ...invocation.globals, - inputs: builtInputs, } + + return newGlobals } getSensitiveValues(hogFunction: HogFunctionType, inputs: Record): string[] { diff --git a/plugin-server/src/cdp/types.ts b/plugin-server/src/cdp/types.ts index e9d506a7a7823..dfe0464a1f9ec 100644 --- a/plugin-server/src/cdp/types.ts +++ b/plugin-server/src/cdp/types.ts @@ -297,6 +297,7 @@ export type HogFunctionInputType = { value: any secret?: boolean bytecode?: HogBytecode | object + order?: number } export type IntegrationType = { diff --git a/posthog/api/hog_function.py b/posthog/api/hog_function.py index 4549f4f3a8bb5..9fa2296e95f32 100644 --- a/posthog/api/hog_function.py +++ b/posthog/api/hog_function.py @@ -79,7 +79,7 @@ class HogFunctionMaskingSerializer(serializers.Serializer): bytecode = serializers.JSONField(required=False, allow_null=True) def validate(self, attrs): - attrs["bytecode"] = generate_template_bytecode(attrs["hash"]) + attrs["bytecode"] = generate_template_bytecode(attrs["hash"], input_collector=set()) return super().validate(attrs) diff --git a/posthog/api/test/test_hog_function.py b/posthog/api/test/test_hog_function.py index b988b53fdbbfb..cea0fd8dc3f0d 100644 --- a/posthog/api/test/test_hog_function.py +++ b/posthog/api/test/test_hog_function.py @@ -456,6 +456,7 @@ def test_secret_inputs_not_returned(self, *args): "I AM SECRET", ], "value": "I AM SECRET", + "order": 0, }, } @@ -463,7 +464,7 @@ def test_secret_inputs_not_returned(self, *args): assert ( raw_encrypted_inputs - == "gAAAAABlkgC8AAAAAAAAAAAAAAAAAAAAAKvzDjuLG689YjjVhmmbXAtZSRoucXuT8VtokVrCotIx3ttPcVufoVt76dyr2phbuotMldKMVv_Y6uzMDZFjX1WLE6eeZEhBJqFv8fQacoHXhDbDh5fvL7DTr1sc2R_DmTwvPQDiSss790vZ6d_vm1Q=" + == "gAAAAABlkgC8AAAAAAAAAAAAAAAAAAAAAKvzDjuLG689YjjVhmmbXAtZSRoucXuT8VtokVrCotIx3ttPcVufoVt76dyr2phbuotMldKMVv_Y6uzMDZFjX1Uvej4GHsYRbsTN_txcQHNnU7zvLee83DhHIrThEjceoq8i7hbfKrvqjEi7GCGc_k_Gi3V5KFxDOfLKnke4KM4s" ) def test_secret_inputs_not_updated_if_not_changed(self, *args): @@ -619,6 +620,7 @@ def test_generates_inputs_bytecode(self, *args): 32, "http://localhost:2080/0e02d917-563f-4050-9725-aad881b69937", ], + "order": 0, }, "payload": { "value": { @@ -628,6 +630,7 @@ def test_generates_inputs_bytecode(self, *args): "person": "{person}", "event_url": "{f'{event.url}-test'}", }, + "order": 1, "bytecode": { "event": ["_H", HOGQL_BYTECODE_VERSION, 32, "event", 1, 1], "groups": ["_H", HOGQL_BYTECODE_VERSION, 32, "groups", 1, 1], @@ -650,7 +653,7 @@ def test_generates_inputs_bytecode(self, *args): ], }, }, - "method": {"value": "POST"}, + "method": {"value": "POST", "order": 2}, "headers": { "value": {"version": "v={event.properties.$lib_version}"}, "bytecode": { @@ -672,6 +675,7 @@ def test_generates_inputs_bytecode(self, *args): 2, ] }, + "order": 3, }, } @@ -1141,6 +1145,7 @@ def test_create_typescript_destination_with_inputs(self): inputs["message"]["transpiled"]["stl"].sort() assert result["inputs"] == { "message": { + "order": 0, "transpiled": { "code": 'concat("Hello, TypeScript ", arrayMap(__lambda((a) => a), [1, 2, 3]), "!")', "lang": "ts", diff --git a/posthog/cdp/site_functions.py b/posthog/cdp/site_functions.py index fa77c20a8f881..e02895f39d99a 100644 --- a/posthog/cdp/site_functions.py +++ b/posthog/cdp/site_functions.py @@ -20,8 +20,9 @@ def get_transpiled_function(hog_function: HogFunction) -> str: compiler = JavaScriptCompiler() - # TODO: reorder inputs to make dependencies work - for key, input in (hog_function.inputs or {}).items(): + all_inputs = hog_function.inputs or {} + all_inputs = sorted(all_inputs.items(), key=lambda x: x[1].get("order", -1)) + for key, input in all_inputs: value = input.get("value") key_string = json.dumps(str(key) or "") if (isinstance(value, str) and "{" in value) or isinstance(value, dict) or isinstance(value, list): diff --git a/posthog/cdp/test/test_site_functions.py b/posthog/cdp/test/test_site_functions.py index 658b16ba41be0..c9821435d848c 100644 --- a/posthog/cdp/test/test_site_functions.py +++ b/posthog/cdp/test/test_site_functions.py @@ -46,6 +46,12 @@ def compile_and_run(self): return result + def _execute_javascript(self, js) -> str: + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write(js.encode("utf-8")) + f.flush() + return subprocess.check_output(["node", f.name]).decode("utf-8") + def test_get_transpiled_function_basic(self): result = self.compile_and_run() assert isinstance(result, str) @@ -343,8 +349,51 @@ def test_run_function_onevent(self): ) assert "Loaded" == response.strip() - def _execute_javascript(self, js) -> str: - with tempfile.NamedTemporaryFile(delete=False) as f: - f.write(js.encode("utf-8")) - f.flush() - return subprocess.check_output(["node", f.name]).decode("utf-8") + def test_get_transpiled_function_with_ordered_inputs(self): + self.hog_function.hog = "export function onLoad() { console.log(inputs); }" + self.hog_function.inputs = { + "first": {"value": "I am first", "order": 0}, + "second": {"value": "{person.properties.name}", "order": 1}, + "third": {"value": "{event.properties.url}", "order": 2}, + } + + result = self.compile_and_run() + + assert '"first": "I am first"' in result + idx_first = result.index('"first": "I am first"') + idx_second = result.index('inputs["second"] = getInputsKey("second");') + idx_third = result.index('inputs["third"] = getInputsKey("third");') + + assert idx_first < idx_second < idx_third + + def test_get_transpiled_function_without_order(self): + self.hog_function.hog = "export function onLoad() { console.log(inputs); }" + self.hog_function.inputs = { + "noOrder": {"value": "I have no order"}, + "alsoNoOrder": {"value": "{person.properties.name}"}, + "withOrder": {"value": "{event.properties.url}", "order": 10}, + } + + result = self.compile_and_run() + + idx_noOrder = result.index('"noOrder": "I have no order"') + idx_alsoNoOrder = result.index('inputs["alsoNoOrder"] = getInputsKey("alsoNoOrder");') + idx_withOrder = result.index('inputs["withOrder"] = getInputsKey("withOrder");') + + assert idx_noOrder < idx_alsoNoOrder < idx_withOrder + + def test_get_transpiled_function_with_duplicate_orders(self): + self.hog_function.hog = "export function onLoad() { console.log(inputs); }" + self.hog_function.inputs = { + "alpha": {"value": "{person.properties.alpha}", "order": 1}, + "beta": {"value": "{person.properties.beta}", "order": 1}, + "gamma": {"value": "Just gamma", "order": 1}, + } + + result = self.compile_and_run() + + idx_alpha = result.index('inputs["alpha"] = getInputsKey("alpha");') + idx_beta = result.index('inputs["beta"] = getInputsKey("beta");') + idx_gamma = result.index('"gamma": "Just gamma"') + + assert idx_alpha is not None and idx_beta is not None and idx_gamma is not None diff --git a/posthog/cdp/test/test_validation.py b/posthog/cdp/test/test_validation.py index 90a41f8cca653..15f6dbb879cbd 100644 --- a/posthog/cdp/test/test_validation.py +++ b/posthog/cdp/test/test_validation.py @@ -85,6 +85,7 @@ def test_validate_inputs(self): 32, "http://localhost:2080/0e02d917-563f-4050-9725-aad881b69937", ], + "order": 0, # Now that we have ordering, url should have some order assigned }, "payload": { "value": { @@ -115,8 +116,12 @@ def test_validate_inputs(self): 2, ], }, + "order": 1, + }, + "method": { + "value": "POST", + "order": 2, }, - "method": {"value": "POST"}, "headers": { "value": {"version": "v={event.properties.$lib_version}"}, "bytecode": { @@ -138,6 +143,7 @@ def test_validate_inputs(self): 2, ] }, + "order": 3, }, } ) @@ -180,6 +186,109 @@ def test_validate_inputs_creates_bytecode_for_html(self): 3, ], "value": '\n\n\n\n\n\n

Hi {person.properties.email}

\n\n', + "order": 0, }, } ) + + # New tests for ordering + def test_validate_inputs_with_dependencies_simple_chain(self): + # Schema: A->B->C + # A has no deps, B uses A, C uses B + inputs_schema = [ + {"key": "A", "type": "string", "required": True}, + {"key": "C", "type": "string", "required": True}, + {"key": "B", "type": "string", "required": True}, + ] + # Values: B depends on A, C depends on B + # We'll use templates referencing inputs.A, inputs.B + inputs = { + "A": {"value": "A value"}, + "C": {"value": "{inputs.B} + C value"}, + "B": {"value": "{inputs.A} + B value"}, + } + + validated = validate_inputs(inputs_schema, inputs) + # Order should be A=0, B=1, C=2 + assert validated["A"]["order"] == 0 + assert validated["B"]["order"] == 1 + assert validated["C"]["order"] == 2 + + def test_validate_inputs_with_multiple_dependencies(self): + # Schema: W, X, Y, Z + # Z depends on W and Y + # Y depends on X + # X depends on W + # So order: W=0, X=1, Y=2, Z=3 + inputs_schema = [ + {"key": "X", "type": "string", "required": True}, + {"key": "W", "type": "string", "required": True}, + {"key": "Z", "type": "string", "required": True}, + {"key": "Y", "type": "string", "required": True}, + ] + inputs = { + "X": {"value": "{inputs.W}_x"}, + "W": {"value": "w"}, + "Z": {"value": "{inputs.W}{inputs.Y}_z"}, # depends on W and Y + "Y": {"value": "{inputs.X}_y"}, + } + + validated = validate_inputs(inputs_schema, inputs) + assert validated["W"]["order"] == 0 + assert validated["X"]["order"] == 1 + assert validated["Y"]["order"] == 2 + assert validated["Z"]["order"] == 3 + + def test_validate_inputs_with_no_dependencies(self): + # All inputs have no references. Any order is fine but all should start from 0 and increment. + inputs_schema = [ + {"key": "one", "type": "string", "required": True}, + {"key": "two", "type": "string", "required": True}, + {"key": "three", "type": "string", "required": True}, + ] + inputs = { + "one": {"value": "1"}, + "two": {"value": "2"}, + "three": {"value": "3"}, + } + + validated = validate_inputs(inputs_schema, inputs) + # Should just assign order in any stable manner (likely alphabetical since no deps): + # Typically: one=0, two=1, three=2 + # The actual order might depend on dictionary ordering, but given code, it should be alphabetical keys since we topologically sort by dependencies. + assert validated["one"]["order"] == 0 + assert validated["two"]["order"] == 1 + assert validated["three"]["order"] == 2 + + def test_validate_inputs_with_circular_dependencies(self): + # A depends on B, B depends on A -> should fail + inputs_schema = [ + {"key": "A", "type": "string", "required": True}, + {"key": "B", "type": "string", "required": True}, + ] + + inputs = { + "A": {"value": "{inputs.B} + A"}, + "B": {"value": "{inputs.A} + B"}, + } + + try: + validate_inputs(inputs_schema, inputs) + raise AssertionError("Expected circular dependency error") + except Exception as e: + assert "Circular dependency" in str(e) + + def test_validate_inputs_with_extraneous_dependencies(self): + # A depends on a non-existing input X + # This should ignore X since it's not defined. + # So no error, but A has no real dependencies that matter. + inputs_schema = [ + {"key": "A", "type": "string", "required": True}, + ] + inputs = { + "A": {"value": "{inputs.X} + A"}, + } + + validated = validate_inputs(inputs_schema, inputs) + # Only A is present, so A=0 + assert validated["A"]["order"] == 0 diff --git a/posthog/cdp/validation.py b/posthog/cdp/validation.py index ac7f19405cfd5..1d4ebf12e2f0b 100644 --- a/posthog/cdp/validation.py +++ b/posthog/cdp/validation.py @@ -6,22 +6,46 @@ from posthog.hogql.compiler.bytecode import create_bytecode from posthog.hogql.compiler.javascript import JavaScriptCompiler from posthog.hogql.parser import parse_program, parse_string_template +from posthog.hogql.visitor import TraversingVisitor from posthog.models.hog_functions.hog_function import TYPES_WITH_JAVASCRIPT_SOURCE +from posthog.hogql import ast logger = logging.getLogger(__name__) -def generate_template_bytecode(obj: Any) -> Any: +class InputCollector(TraversingVisitor): + inputs: set[str] + + def __init__(self): + super().__init__() + self.inputs = set() + + def visit_field(self, node: ast.Field): + super().visit_field(node) + if node.chain[0] == "inputs": + if len(node.chain) > 1: + self.inputs.add(str(node.chain[1])) + + +def collect_inputs(node: ast.Expr) -> set[str]: + input_collector = InputCollector() + input_collector.visit(node) + return input_collector.inputs + + +def generate_template_bytecode(obj: Any, input_collector: set[str]) -> Any: """ Clones an object, compiling any string values to bytecode templates """ if isinstance(obj, dict): - return {key: generate_template_bytecode(value) for key, value in obj.items()} + return {key: generate_template_bytecode(value, input_collector) for key, value in obj.items()} elif isinstance(obj, list): - return [generate_template_bytecode(item) for item in obj] + return [generate_template_bytecode(item, input_collector) for item in obj] elif isinstance(obj, str): - return create_bytecode(parse_string_template(obj)).bytecode + node = parse_string_template(obj) + input_collector.update(collect_inputs(node)) + return create_bytecode(node).bytecode else: return obj @@ -81,6 +105,9 @@ def to_representation(self, value): class InputsItemSerializer(serializers.Serializer): value = AnyInputField(required=False) bytecode = serializers.ListField(required=False, read_only=True) + # input_deps = serializers.ListField(required=False) + order = serializers.IntegerField(required=False) + transpiled = serializers.JSONField(required=False) def validate(self, attrs): schema = self.context["schema"] @@ -112,9 +139,9 @@ def validate(self, attrs): elif item_type == "email": if not isinstance(value, dict): raise serializers.ValidationError({"inputs": {name: f"Value must be an Integration ID."}}) - for key in ["from", "to", "subject"]: - if not value.get(key): - raise serializers.ValidationError({"inputs": {name: f"Missing value for '{key}'."}}) + for key_ in ["from", "to", "subject"]: + if not value.get(key_): + raise serializers.ValidationError({"inputs": {name: f"Missing value for '{key_}'."}}) if not value.get("text") and not value.get("html"): raise serializers.ValidationError({"inputs": {name: f"Either 'text' or 'html' is required."}}) @@ -133,7 +160,9 @@ def validate(self, attrs): if "bytecode" in attrs: del attrs["bytecode"] else: - attrs["bytecode"] = generate_template_bytecode(value) + input_collector: set[str] = set() + attrs["bytecode"] = generate_template_bytecode(value, input_collector) + attrs["input_deps"] = list(input_collector) if "transpiled" in attrs: del attrs["transpiled"] except Exception as e: @@ -154,6 +183,41 @@ def validate_inputs_schema(value: list) -> list: return serializer.validated_data or [] +def topological_sort(nodes: list[str], edges: dict[str, list[str]]) -> list[str]: + """ + Perform a topological sort on the given graph. + nodes: list of all node identifiers + edges: adjacency list where edges[node] = list of nodes that `node` depends on + Returns: A list of nodes in topologically sorted order (no cycles). + Raises an error if a cycle is detected. + """ + # Build in-degree + in_degree = {node: 0 for node in nodes} + for node, deps in edges.items(): + for dep in deps: + if dep in in_degree: + in_degree[node] = in_degree[node] + 1 + + # Find all nodes with in_degree 0 + queue = [n for n, d in in_degree.items() if d == 0] + sorted_list = [] + + while queue: + current = queue.pop(0) + sorted_list.append(current) + # Decrease in-degree of dependent nodes + for node, deps in edges.items(): + if current in deps: + in_degree[node] -= 1 + if in_degree[node] == 0: + queue.append(node) + + if len(sorted_list) != len(nodes): + raise serializers.ValidationError("Circular dependency detected in input_deps.") + + return sorted_list + + def validate_inputs( inputs_schema: list, inputs: dict, @@ -162,10 +226,13 @@ def validate_inputs( ) -> dict: """ Tricky: We want to allow overriding the secret inputs, but not return them. - If we have a given input then we use it, otherwise we pull it from the existing secrets + If we have a given input then we use it, otherwise we pull it from the existing secrets. + Then we do topological sorting based on input_deps to assign order. """ + validated_inputs = {} + # Validate each input against the schema for schema in inputs_schema: value = inputs.get(schema["key"]) or {} @@ -180,9 +247,35 @@ def validate_inputs( if not serializer.is_valid(): raise serializers.ValidationError(serializer.errors) - validated_inputs[schema["key"]] = serializer.validated_data + validated_data = serializer.validated_data + + # If it's a secret input, not required, and no value was provided, don't add it + if schema.get("secret", False) and not schema.get("required", False) and "value" not in validated_data: + # Skip adding this input entirely + continue + + validated_inputs[schema["key"]] = validated_data + + # We'll topologically sort keys based on their input_deps. + edges = {} + all_keys = list(validated_inputs.keys()) + for k, v in validated_inputs.items(): + deps = v.get("input_deps", []) + deps = [d for d in deps if d in validated_inputs] + edges[k] = deps + + sorted_keys = topological_sort(all_keys, edges) + + # Assign order according to topological sort + for i, key in enumerate(sorted_keys): + validated_inputs[key]["order"] = i + if "input_deps" in validated_inputs[key]: + del validated_inputs[key]["input_deps"] + + # Rebuild in sorted order + sorted_validated_inputs = {key: validated_inputs[key] for key in sorted_keys} - return validated_inputs + return sorted_validated_inputs def compile_hog(hog: str, hog_type: str, in_repl: Optional[bool] = False) -> list[Any]: