From c14bed95b2a050f7534441c0e87350cf3434db30 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Wed, 28 Aug 2024 11:43:13 +0200 Subject: [PATCH] feat(hog): inline STL --- hogvm/__tests__/__snapshots__/inlineStl.hoge | 16 +++++++ .../__tests__/__snapshots__/inlineStl.stdout | 10 +++++ hogvm/__tests__/inlineStl.hog | 12 ++++++ .../customerio/template_customerio.py | 8 +--- posthog/hogql/bytecode.py | 42 ++++++++++++++++++- posthog/hogql/inline_stl.py | 35 ++++++++++++++++ 6 files changed, 114 insertions(+), 9 deletions(-) create mode 100644 hogvm/__tests__/__snapshots__/inlineStl.hoge create mode 100644 hogvm/__tests__/__snapshots__/inlineStl.stdout create mode 100644 hogvm/__tests__/inlineStl.hog create mode 100644 posthog/hogql/inline_stl.py diff --git a/hogvm/__tests__/__snapshots__/inlineStl.hoge b/hogvm/__tests__/__snapshots__/inlineStl.hoge new file mode 100644 index 0000000000000..67dcb67a3f751 --- /dev/null +++ b/hogvm/__tests__/__snapshots__/inlineStl.hoge @@ -0,0 +1,16 @@ +["_H", 1, 52, "arrayExists", 2, 0, 55, 36, 1, 36, 2, 2, "values", 1, 33, 1, 36, 3, 2, "length", 1, 31, 36, 5, 36, 4, 16, +40, 26, 36, 3, 36, 4, 45, 37, 6, 36, 6, 36, 0, 54, 1, 40, 2, 29, 38, 36, 4, 33, 1, 6, 37, 4, 39, -33, 35, 35, 35, 35, +35, 30, 38, 53, 0, 52, "arrayFilter", 2, 0, 66, 43, 0, 36, 1, 36, 3, 2, "values", 1, 33, 1, 36, 4, 2, "length", 1, 31, +36, 6, 36, 5, 16, 40, 33, 36, 4, 36, 5, 45, 37, 7, 36, 7, 36, 0, 54, 1, 40, 9, 36, 2, 36, 7, 2, "arrayPushBack", 2, 37, +2, 36, 5, 33, 1, 6, 37, 5, 39, -40, 35, 35, 35, 35, 35, 36, 2, 38, 35, 53, 0, 52, "arrayMap", 2, 0, 62, 43, 0, 36, 1, +36, 3, 2, "values", 1, 33, 1, 36, 4, 2, "length", 1, 31, 36, 6, 36, 5, 16, 40, 29, 36, 4, 36, 5, 45, 37, 7, 36, 2, 36, +7, 36, 0, 54, 1, 2, "arrayPushBack", 2, 37, 2, 36, 5, 33, 1, 6, 37, 5, 39, -36, 35, 35, 35, 35, 35, 36, 2, 38, 35, 53, +0, 32, "--- arrayMap ----", 2, "print", 1, 35, 52, "lambda", 1, 0, 6, 33, 2, 36, 0, 8, 38, 53, 0, 33, 1, 33, 2, 33, 3, +43, 3, 36, 2, 54, 2, 2, "print", 1, 35, 32, "--- arrayExists ----", 2, "print", 1, 35, 52, "lambda", 1, 0, 6, 32, +"%nana%", 36, 0, 17, 38, 53, 0, 32, "apple", 32, "banana", 32, "cherry", 43, 3, 36, 0, 54, 2, 2, "print", 1, 35, 52, +"lambda", 1, 0, 6, 32, "%boom%", 36, 0, 17, 38, 53, 0, 32, "apple", 32, "banana", 32, "cherry", 43, 3, 36, 0, 54, 2, 2, +"print", 1, 35, 52, "lambda", 1, 0, 6, 32, "%boom%", 36, 0, 17, 38, 53, 0, 43, 0, 36, 0, 54, 2, 2, "print", 1, 35, 32, +"--- arrayFilter ----", 2, "print", 1, 35, 52, "lambda", 1, 0, 6, 32, "%nana%", 36, 0, 17, 38, 53, 0, 32, "apple", 32, +"banana", 32, "cherry", 43, 3, 36, 1, 54, 2, 2, "print", 1, 35, 52, "lambda", 1, 0, 6, 32, "%e%", 36, 0, 17, 38, 53, 0, +32, "apple", 32, "banana", 32, "cherry", 43, 3, 36, 1, 54, 2, 2, "print", 1, 35, 52, "lambda", 1, 0, 6, 32, "%boom%", +36, 0, 17, 38, 53, 0, 43, 0, 36, 1, 54, 2, 2, "print", 1, 35, 35, 35, 35] diff --git a/hogvm/__tests__/__snapshots__/inlineStl.stdout b/hogvm/__tests__/__snapshots__/inlineStl.stdout new file mode 100644 index 0000000000000..75fb5213f4b95 --- /dev/null +++ b/hogvm/__tests__/__snapshots__/inlineStl.stdout @@ -0,0 +1,10 @@ +--- arrayMap ---- +[2, 4, 6] +--- arrayExists ---- +true +false +false +--- arrayFilter ---- +['banana'] +['apple', 'cherry'] +[] diff --git a/hogvm/__tests__/inlineStl.hog b/hogvm/__tests__/inlineStl.hog new file mode 100644 index 0000000000000..c70970182e361 --- /dev/null +++ b/hogvm/__tests__/inlineStl.hog @@ -0,0 +1,12 @@ +print('--- arrayMap ----') +print(arrayMap(x -> x * 2, [1,2,3])) + +print('--- arrayExists ----') +print(arrayExists(x -> x like '%nana%', ['apple', 'banana', 'cherry'])) +print(arrayExists(x -> x like '%boom%', ['apple', 'banana', 'cherry'])) +print(arrayExists(x -> x like '%boom%', [])) + +print('--- arrayFilter ----') +print(arrayFilter(x -> x like '%nana%', ['apple', 'banana', 'cherry'])) +print(arrayFilter(x -> x like '%e%', ['apple', 'banana', 'cherry'])) +print(arrayFilter(x -> x like '%boom%', [])) diff --git a/posthog/cdp/templates/customerio/template_customerio.py b/posthog/cdp/templates/customerio/template_customerio.py index a9131f1bd47e0..e4ea26cc29e67 100644 --- a/posthog/cdp/templates/customerio/template_customerio.py +++ b/posthog/cdp/templates/customerio/template_customerio.py @@ -14,13 +14,7 @@ let action := inputs.action let name := event.name -let hasIdentifier := false - -for (let key, value in inputs.identifiers) { - if (not empty(value)) { - hasIdentifier := true - } -} +let hasIdentifier := arrayExists(x -> not empty(x), values(inputs.identifiers)) if (not hasIdentifier) { print('No identifier set. Skipping as at least 1 identifier is needed.') diff --git a/posthog/hogql/bytecode.py b/posthog/hogql/bytecode.py index 268a9dedc15c9..f2f800a345b90 100644 --- a/posthog/hogql/bytecode.py +++ b/posthog/hogql/bytecode.py @@ -9,8 +9,9 @@ from posthog.hogql.base import AST from posthog.hogql.context import HogQLContext from posthog.hogql.errors import QueryError +from posthog.hogql.inline_stl import INLINE_STL from posthog.hogql.parser import parse_program -from posthog.hogql.visitor import Visitor +from posthog.hogql.visitor import Visitor, TraversingVisitor from hogvm.python.operation import ( Operation, HOGQL_BYTECODE_IDENTIFIER, @@ -70,6 +71,22 @@ def create_bytecode( bytecode.append(HOGQL_BYTECODE_IDENTIFIER) bytecode.append(HOGQL_BYTECODE_VERSION) + # Find all accessed inline STL functions and inline them at the start of the function + stl_functions: list[ast.Declaration] = [] + for field in sorted(find_fields(expr)): + if field in INLINE_STL and field not in supported_functions: + function_program = parse_program(INLINE_STL[field]) + stl_functions.extend(function_program.declarations) + if stl_functions: + if isinstance(expr, ast.Program): + expr = ast.Program(declarations=[*stl_functions, *expr.declarations]) + elif isinstance(expr, ast.ExprStatement): + expr = ast.Program(declarations=[*stl_functions, ast.ReturnStatement(expr=expr.expr)]) + elif isinstance(expr, ast.Statement): + expr = ast.Program(declarations=[*stl_functions, expr]) + else: + expr = ast.Program(declarations=[*stl_functions, ast.ReturnStatement(expr=expr)]) + bytecode.extend(BytecodeCompiler(supported_functions, args, context, enclosing).visit(expr)) return bytecode @@ -107,7 +124,6 @@ def __init__( self.supported_functions = supported_functions or set() self.locals: list[Local] = [] self.upvalues: list[UpValue] = [] - # self.functions: dict[str, HogFunction] = {} self.scope_depth = 0 self.args = args # we're in a function definition @@ -816,3 +832,25 @@ def execute_hog( context=HogQLContext(team_id=team.id if team else None), ) return execute_bytecode(bytecode, globals=globals, functions=functions, timeout=timeout, team=team) + + +class FieldFinder(TraversingVisitor): + fields: set[str] + + def __init__(self): + self.fields = set() + + def visit_field(self, node: ast.Field): + if len(node.chain) == 1: + self.fields.add(str(node.chain[0])) + + def visit_call(self, node: ast.Call): + self.fields.add(node.name) + for arg in node.args: + self.visit(arg) + + +def find_fields(node: ast.Expr | ast.Statement | ast.Program) -> set[str]: + finder = FieldFinder() + finder.visit(node) + return finder.fields diff --git a/posthog/hogql/inline_stl.py b/posthog/hogql/inline_stl.py new file mode 100644 index 0000000000000..7ea8df42f64a1 --- /dev/null +++ b/posthog/hogql/inline_stl.py @@ -0,0 +1,35 @@ +# TODO: this needs a better buildchain +# Functions defined here will be inlined into the HogQL bytecode + +INLINE_STL = { + "arrayExists": """ + fn arrayExists(func, arr) { + for (let i in arr) { + if (func(i)) { + return true + } + } + return false + } + """, + "arrayMap": """ + fn arrayMap(func, arr) { + let result := [] + for (let i in arr) { + result := arrayPushBack(result, func(i)) + } + return result + } + """, + "arrayFilter": """ + fn arrayFilter(func, arr) { + let result := [] + for (let i in arr) { + if (func(i)) { + result := arrayPushBack(result, i) + } + } + return result + } + """, +}