Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(hog): importing modules in hog #25796

Merged
merged 26 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ae546e8
feat(hog): importing modules in hog
mariusandra Oct 24, 2024
4b4a1db
Merge branch 'master' into messaging-part-2
mariusandra Oct 24, 2024
3e9cb9c
python multiple bytecodes
mariusandra Oct 24, 2024
828a360
python multiple bytecodes
mariusandra Oct 24, 2024
4f2f117
return root bytecode
mariusandra Oct 25, 2024
bd9fb2e
bump
mariusandra Oct 25, 2024
7012562
bump it all
mariusandra Oct 25, 2024
a5d8a61
Update UI snapshots for `chromium` (1)
github-actions[bot] Oct 25, 2024
aafb97b
tests
mariusandra Oct 25, 2024
9c115b9
Merge branch 'master' into messaging-part-2
mariusandra Oct 25, 2024
41bb0d0
mypy
mariusandra Oct 25, 2024
19e9e55
fix(snapshots): disable flakes
mariusandra Oct 25, 2024
e10393e
Merge branch 'fix-flakes-ffs' into messaging-part-2
mariusandra Oct 25, 2024
e7f30e9
remove comment
mariusandra Oct 25, 2024
1b7d156
Update UI snapshots for `chromium` (2)
github-actions[bot] Oct 25, 2024
164400c
Update UI snapshots for `chromium` (1)
github-actions[bot] Oct 25, 2024
648ee6b
Update UI snapshots for `chromium` (2)
github-actions[bot] Oct 25, 2024
d17ed70
Merge branch 'master' into messaging-part-2
mariusandra Oct 25, 2024
ad3f5d2
Merge branch 'messaging-part-2' of github.com:PostHog/posthog into me…
mariusandra Oct 25, 2024
865900e
Update UI snapshots for `chromium` (1)
github-actions[bot] Oct 25, 2024
f70789e
Update UI snapshots for `chromium` (2)
github-actions[bot] Oct 25, 2024
9cf1d61
Update UI snapshots for `chromium` (1)
github-actions[bot] Oct 25, 2024
60f8ca7
Update UI snapshots for `chromium` (2)
github-actions[bot] Oct 25, 2024
f494ac3
Update query snapshots
github-actions[bot] Oct 25, 2024
619228c
Update query snapshots
github-actions[bot] Oct 25, 2024
2ca140b
Update UI snapshots for `chromium` (1)
github-actions[bot] Oct 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
110 changes: 82 additions & 28 deletions hogvm/python/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,36 @@

MAX_MEMORY = 64 * 1024 * 1024 # 64 MB
MAX_FUNCTION_ARGS_LENGTH = 300
CALLSTACK_LENGTH = 1000


@dataclass
class BytecodeResult:
result: Any
bytecode: list[Any]
bytecodes: dict[str, list[Any]]
stdout: list[str]


def execute_bytecode(
bytecode: list[Any],
input: list[Any] | dict,
globals: Optional[dict[str, Any]] = None,
functions: Optional[dict[str, Callable[..., Any]]] = None,
timeout=timedelta(seconds=5),
team: Optional["Team"] = None,
debug=False,
) -> BytecodeResult:
if len(bytecode) == 0 or (bytecode[0] != HOGQL_BYTECODE_IDENTIFIER and bytecode[0] != HOGQL_BYTECODE_IDENTIFIER_V0):
bytecodes = input if isinstance(input, dict) else {"root": {"bytecode": input}}
root_bytecode = bytecodes.get("root", {}).get("bytecode", []) or []

if (
not root_bytecode
or len(root_bytecode) == 0
or (root_bytecode[0] != HOGQL_BYTECODE_IDENTIFIER and root_bytecode[0] != HOGQL_BYTECODE_IDENTIFIER_V0)
):
raise HogVMException(f"Invalid bytecode. Must start with '{HOGQL_BYTECODE_IDENTIFIER}'")
version = bytecode[1] if len(bytecode) >= 2 and bytecode[0] == HOGQL_BYTECODE_IDENTIFIER else 0
result = None
version = root_bytecode[1] if len(root_bytecode) >= 2 and root_bytecode[0] == HOGQL_BYTECODE_IDENTIFIER else 0
start_time = time.time()
last_op = len(bytecode) - 1
last_op = len(root_bytecode) - 1
stack: list = []
upvalues: list[dict] = []
upvalues_by_id: dict[int, dict] = {}
Expand All @@ -61,42 +68,54 @@ def execute_bytecode(
max_mem_used = 0
ops = 0
stdout: list[str] = []
colored_bytecode = color_bytecode(bytecode) if debug else []
debug_bytecode = []
if isinstance(timeout, int):
timeout = timedelta(seconds=timeout)

if len(call_stack) == 0:
call_stack.append(
CallFrame(
ip=2 if bytecode[0] == HOGQL_BYTECODE_IDENTIFIER else 1,
ip=0,
chunk="root",
stack_start=0,
arg_len=0,
closure=new_hog_closure(
new_hog_callable(
type="main",
type="local",
arg_count=0,
upvalue_count=0,
ip=2 if bytecode[0] == HOGQL_BYTECODE_IDENTIFIER else 1,
ip=0,
chunk="root",
name="",
)
),
)
)
frame = call_stack[-1]
chunk_bytecode: list[Any] = bytecode
chunk_bytecode: list[Any] = root_bytecode
chunk_globals = globals

def set_chunk_bytecode():
nonlocal chunk_bytecode, last_op
nonlocal chunk_bytecode, chunk_globals, last_op, debug_bytecode
if not frame.chunk or frame.chunk == "root":
chunk_bytecode = bytecode
last_op = len(bytecode) - 1
chunk_bytecode = root_bytecode
chunk_globals = globals
elif frame.chunk.startswith("stl/") and frame.chunk[4:] in BYTECODE_STL:
chunk_bytecode = BYTECODE_STL[frame.chunk[4:]][1]
last_op = len(bytecode) - 1
chunk_globals = {}
elif bytecodes.get(frame.chunk):
chunk_bytecode = bytecodes[frame.chunk].get("bytecode", [])
chunk_globals = bytecodes[frame.chunk].get("globals", {})
else:
raise HogVMException(f"Unknown chunk: {frame.chunk}")
last_op = len(chunk_bytecode) - 1
if debug:
debug_bytecode = color_bytecode(chunk_bytecode)
if frame.ip == 0 and (chunk_bytecode[0] == "_H" or chunk_bytecode[0] == "_h"):
# TODO: store chunk version
frame.ip += 2 if chunk_bytecode[0] == "_H" else 1

set_chunk_bytecode()

def stack_keep_first_elements(count: int) -> list[Any]:
nonlocal stack, mem_stack, mem_used
Expand Down Expand Up @@ -163,13 +182,28 @@ def capture_upvalue(index) -> dict:
return created_upvalue

symbol: Any = None
while frame.ip <= last_op:
while True:
# Return or jump back to the previous call frame if ran out of bytecode to execute in this one, and return null
if frame.ip > last_op:
last_call_frame = call_stack.pop()
if len(call_stack) == 0 or last_call_frame is None:
if len(stack) > 1:
raise HogVMException("Invalid bytecode. More than one value left on stack")
return BytecodeResult(
result=pop_stack() if len(stack) > 0 else None, stdout=stdout, bytecodes=bytecodes
)
stack_start = last_call_frame.stack_start
stack_keep_first_elements(stack_start)
push_stack(None)
frame = call_stack[-1]
set_chunk_bytecode()

ops += 1
symbol = chunk_bytecode[frame.ip]
if (ops & 127) == 0: # every 128th operation
check_timeout()
elif debug:
debugger(symbol, bytecode, colored_bytecode, frame.ip, stack, call_stack, throw_stack)
debugger(symbol, chunk_bytecode, debug_bytecode, frame.ip, stack, call_stack, throw_stack)
match symbol:
case None:
break
Expand Down Expand Up @@ -247,8 +281,8 @@ def capture_upvalue(index) -> dict:
push_stack(not bool(re.search(re.compile(args[1], re.RegexFlag.IGNORECASE), args[0])))
case Operation.GET_GLOBAL:
chain = [pop_stack() for _ in range(next_token())]
if globals and chain[0] in globals:
push_stack(deepcopy(get_nested_value(globals, chain, True)))
if chunk_globals and chain[0] in chunk_globals:
push_stack(deepcopy(get_nested_value(chunk_globals, chain, True)))
elif functions and chain[0] in functions:
push_stack(
new_hog_closure(
Expand Down Expand Up @@ -298,7 +332,7 @@ def capture_upvalue(index) -> dict:
response = pop_stack()
last_call_frame = call_stack.pop()
if len(call_stack) == 0 or last_call_frame is None:
return BytecodeResult(result=response, stdout=stdout, bytecode=bytecode)
return BytecodeResult(result=response, stdout=stdout, bytecodes=bytecodes)
stack_start = last_call_frame.stack_start
stack_keep_first_elements(stack_start)
push_stack(response)
Expand Down Expand Up @@ -459,10 +493,35 @@ def capture_upvalue(index) -> dict:
)
),
)
set_chunk_bytecode()
call_stack.append(frame)
continue # resume the loop without incrementing frame.ip
else:
if functions is not None and name in functions:
if name == "import":
if arg_count != 1:
raise HogVMException("Function import requires exactly 1 argument")
module_name = pop_stack()
frame.ip += 1 # advance for when we return
frame = CallFrame(
ip=0,
chunk=module_name,
stack_start=len(stack),
arg_len=0,
closure=new_hog_closure(
new_hog_callable(
type="local",
name=module_name,
arg_count=0,
upvalue_count=0,
ip=0,
chunk=module_name,
)
),
)
set_chunk_bytecode()
call_stack.append(frame)
continue
elif functions is not None and name in functions:
if version == 0:
args = [pop_stack() for _ in range(arg_count)]
else:
Expand Down Expand Up @@ -598,10 +657,5 @@ def capture_upvalue(index) -> dict:
)

frame.ip += 1
if debug:
debugger(symbol, bytecode, colored_bytecode, frame.ip, stack, call_stack, throw_stack)
if len(stack) > 1:
raise HogVMException("Invalid bytecode. More than one value left on stack")
if len(stack) == 1:
result = pop_stack()
return BytecodeResult(result=result, stdout=stdout, bytecode=bytecode)

return BytecodeResult(result=pop_stack() if len(stack) > 0 else None, stdout=stdout, bytecodes=bytecodes)
25 changes: 25 additions & 0 deletions hogvm/python/test/test_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -1008,3 +1008,28 @@ def test_bytecode_uncaught_errors(self):
assert e.payload == {"key": "value"}
else:
raise AssertionError("Expected Exception not raised")

def test_multiple_bytecodes(self):
ret = lambda string: {"bytecode": ["_H", 1, op.STRING, string, op.RETURN]}
call = lambda chunk: {"bytecode": ["_H", 1, op.STRING, chunk, op.CALL_GLOBAL, "import", 1, op.RETURN]}
res = execute_bytecode(
{
"root": call("code2"),
"code2": ret("banana"),
}
)
assert res.result == "banana"

def test_multiple_bytecodes_callback(self):
ret = lambda string: {"bytecode": ["_H", 1, op.STRING, string, op.RETURN]}
call = lambda chunk: {"bytecode": ["_H", 1, op.STRING, chunk, op.CALL_GLOBAL, "import", 1, op.RETURN]}
res = execute_bytecode(
{
"root": call("code2"),
"code2": call("code3"),
"code3": call("code4"),
"code4": call("code5"),
"code5": ret("tomato"),
}
)
assert res.result == "tomato"
2 changes: 1 addition & 1 deletion hogvm/stl/src/arrayCount.hog
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
fn arrayCount(func, arr) {
fun arrayCount(func, arr) {
let count := 0
for (let i in arr) {
if (func(i)) {
Expand Down
2 changes: 1 addition & 1 deletion hogvm/stl/src/arrayExists.hog
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
fn arrayExists(func, arr) {
fun arrayExists(func, arr) {
for (let i in arr) {
if (func(i)) {
return true
Expand Down
2 changes: 1 addition & 1 deletion hogvm/stl/src/arrayFilter.hog
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
fn arrayFilter(func, arr) {
fun arrayFilter(func, arr) {
let result := []
for (let i in arr) {
if (func(i)) {
Expand Down
2 changes: 1 addition & 1 deletion hogvm/stl/src/arrayMap.hog
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
fn arrayMap(func, arr) {
fun arrayMap(func, arr) {
let result := []
for (let i in arr) {
result := arrayPushBack(result, func(i))
Expand Down
2 changes: 1 addition & 1 deletion hogvm/typescript/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@posthog/hogvm",
"version": "1.0.54",
"version": "1.0.55",
"description": "PostHog Hog Virtual Machine",
"types": "dist/index.d.ts",
"source": "src/index.ts",
Expand Down
Loading
Loading