Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
mariusandra committed Aug 28, 2024
1 parent 2eb5a95 commit 2326faf
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 46 deletions.
42 changes: 7 additions & 35 deletions hogvm/README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# HogVM

A HogVM is a 🦔 that runs HogQL bytecode. It's purpose is to locally evaluate HogQL expressions against any object.
A HogVM is a 🦔 that runs Hog bytecode. It's purpose is to locally evaluate Hog/QL expressions against any object.

## HogQL bytecode
## Hog bytecode

HogQL Bytecode is a compact representation of a subset of the HogQL AST nodes. It follows a certain structure:
Hog Bytecode is a compact representation of a subset of the Hog AST nodes. It follows a certain structure:

```
1 + 2 # [_H, op.INTEGER, 2, op.INTEGER, 1, op.PLUS]
Expand All @@ -23,11 +23,11 @@ The `python/execute.py` function in this folder acts as the reference implementa

### Operations

To be considered a PostHog HogQL Bytecode Certified Parser, you must implement the following operations:
Here's a sample list of Hog bytecode operations, missing about half of them and likely out of date:

```bash
FIELD = 1 # [arg3, arg2, arg1, FIELD, 3] # arg1.arg2.arg3
CALL = 2 # [arg2, arg1, CALL, 'concat', 2] # concat(arg1, arg2)
CALL_GLOBAL = 2 # [arg2, arg1, CALL, 'concat', 2] # concat(arg1, arg2)
AND = 3 # [val3, val2, val1, AND, 3] # val1 and val2 and val3
OR = 4 # [val3, val2, val1, OR, 3] # val1 or val2 or val3
NOT = 5 # [val, NOT] # not val
Expand Down Expand Up @@ -60,29 +60,9 @@ INTEGER = 33 # [INTEGER, 123] # 123
FLOAT = 34 # [FLOAT, 123.12] # 123.01
```

### Async Operations

Some operations can't be computed directly, and are thus asked back to the caller. These include:

```bash
IN_COHORT = 27 # [val2, val1, IREGEX] # val1 in cohort val2
NOT_IN_COHORT = 28 # [val2, val1, NOT_IREGEX] # val1 not in cohort val2
```

The arguments for these instructions will be passed on to the provided `async_operation(*args)` in reverse:

```python
def async_operation(*args):
if args[0] == op.IN_COHORT:
return db.queryInCohort(args[1], args[2])
return False

execute_bytecode(to_bytecode("'user_id' in cohort 2"), {}, async_operation).result
```

### Functions

A PostHog HogQL Bytecode Certified Parser must also implement the following function calls:
A Hog Certified Parser must also implement the following function calls:

```bash
concat(...) # concat('test: ', 1, null, '!') == 'test: 1!'
Expand All @@ -96,19 +76,11 @@ ifNull(val, alternative) # ifNull('string', false) == 'string'

### Null handling

In HogQL equality comparisons, `null` is treated as any other variable. Its presence will not make functions automatically return `null`, as is the ClickHouse default.
In Hog/QL equality comparisons, `null` is treated as any other variable. Its presence will not make functions automatically return `null`, as is the ClickHouse default.

```sql
1 == null # false
1 != null # true
```

Nulls are just ignored in `concat`


## Known broken features

- **Regular Expression** support is implemented, but NOT GUARANTEED to the same way across platforms. Different implementations (ClickHouse, Python, Node) use different Regexp engines. ClickHouse uses `re2`, the others use `pcre`. Use the case-insensitive regex operators instead of passing in modifier flags through the expression.
- **DateTime** comparisons are not supported.
- **Cohort Matching** operations are not implemented.
- Only a small subset of functions is enabled. This list is bound to expand.
35 changes: 24 additions & 11 deletions hogvm/typescript/src/execute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ export async function execAsync(bytecode: any[], options?: ExecOptions): Promise

export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
let vmState: VMState | undefined = undefined
let bytecode: any[] | undefined = undefined
let bytecode: any[]
if (!Array.isArray(code)) {
vmState = code
bytecode = vmState.bytecode
Expand Down Expand Up @@ -171,6 +171,16 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
} satisfies CallFrame)
}
let frame: CallFrame = callStack[callStack.length - 1]
let chunkBytecode: any[] = bytecode
const setChunkBytecode = (): void => {
if (!frame.chunk || frame.chunk === 'root') {
chunkBytecode = bytecode
} else if (frame.chunk.startsWith('stl/')) {
chunkBytecode = BYTECODE_STL[frame.chunk.substring(4)]?.[1] ?? []
} else {
throw new HogVMException(`Unknown chunk: ${frame.chunk}`)
}
}

function popStack(): any {
if (stack.length === 0) {
Expand Down Expand Up @@ -211,11 +221,10 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
}

function next(): any {
const bc = frame.chunk === 'root' || !frame.chunk ? bytecode : BYTECODE_STL[frame.chunk][1]
if (frame.ip >= bc!.length - 1) {
if (frame.ip >= chunkBytecode.length - 1) {
throw new HogVMException('Unexpected end of bytecode')
}
return bc![++frame.ip]
return chunkBytecode[++frame.ip]
}

function checkTimeout(): void {
Expand Down Expand Up @@ -260,15 +269,14 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
}

while (true) {
const bc = frame.chunk === 'root' || !frame.chunk ? bytecode : BYTECODE_STL[frame.chunk][1]
if (frame.ip >= bc.length) {
if (frame.ip >= chunkBytecode.length) {
break
}
ops += 1
if ((ops & 127) === 0) {
checkTimeout()
}
switch (bc[frame.ip]) {
switch (chunkBytecode[frame.ip]) {
case null:
break
case Operation.STRING:
Expand Down Expand Up @@ -435,7 +443,7 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
argCount: BYTECODE_STL[chain[0]][0].length,
upvalueCount: 0,
ip: 0,
chunk: chain[0],
chunk: `stl/${chain[0]}`,
})
)
)
Expand All @@ -460,6 +468,7 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
stackKeepFirstElements(stackStart)
pushStack(result)
frame = callStack[callStack.length - 1]
setChunkBytecode()
continue // resume the loop without incrementing frame.ip
}
case Operation.GET_LOCAL:
Expand Down Expand Up @@ -632,6 +641,7 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
})
),
} satisfies CallFrame
setChunkBytecode()
callStack.push(frame)
continue // resume the loop without incrementing frame.ip
} else {
Expand Down Expand Up @@ -707,7 +717,7 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
frame.ip += 1 // advance for when we return
frame = {
ip: 0,
chunk: name,
chunk: `stl/${name}`,
stackStart: stack.length - temp,
argCount: temp,
closure: newHogClosure(
Expand All @@ -716,10 +726,11 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
argCount: temp,
upvalueCount: 0,
ip: 0,
chunk: name,
chunk: `stl/${name}`,
})
),
} satisfies CallFrame
setChunkBytecode()
callStack.push(frame)
continue // resume the loop without incrementing frame.ip
} else {
Expand Down Expand Up @@ -762,6 +773,7 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
argCount: closure.callable.argCount,
closure,
} satisfies CallFrame
setChunkBytecode()
callStack.push(frame)
continue // resume the loop without incrementing frame.ip
} else if (closure.callable.__hogCallable__ === 'stl') {
Expand Down Expand Up @@ -850,6 +862,7 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
callStack.splice(callStackLen)
pushStack(exception)
frame = callStack[callStack.length - 1]
setChunkBytecode()
frame.ip = catchIp
continue // resume the loop without incrementing frame.ip
} else {
Expand All @@ -858,7 +871,7 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
}
default:
throw new HogVMException(
`Unexpected node while running bytecode in chunk "${frame.chunk}": ${bc[frame.ip]}`
`Unexpected node while running bytecode in chunk "${frame.chunk}": ${chunkBytecode[frame.ip]}`
)
}

Expand Down

0 comments on commit 2326faf

Please sign in to comment.