Skip to content

Commit

Permalink
feat(hog): autocomplete (#23332)
Browse files Browse the repository at this point in the history
  • Loading branch information
mariusandra authored Jun 28, 2024
1 parent 7a4acb9 commit 0db8b48
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -227,26 +227,97 @@
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.1
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
/* user_id:0 request:_snapshot_ */
SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value,
count(*) as count
FROM events e
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam')
GROUP BY value
ORDER BY count DESC, value DESC
LIMIT 26
OFFSET 0
'''
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.2
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
/* user_id:0 request:_snapshot_ */
SELECT countIf(steps = 1) step_1,
countIf(steps = 2) step_2,
avg(step_1_average_conversion_time_inner) step_1_average_conversion_time,
median(step_1_median_conversion_time_inner) step_1_median_conversion_time,
prop
FROM
(SELECT aggregation_target,
steps,
avg(step_1_conversion_time) step_1_average_conversion_time_inner,
median(step_1_conversion_time) step_1_median_conversion_time_inner ,
prop
FROM
(SELECT aggregation_target,
steps,
max(steps) over (PARTITION BY aggregation_target,
prop) as max_steps,
step_1_conversion_time ,
prop
FROM
(SELECT *,
if(latest_0 <= latest_1
AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps ,
if(isNotNull(latest_1)
AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
prop
FROM
(SELECT aggregation_target, timestamp, step_0,
latest_0,
step_1,
min(latest_1) over (PARTITION by aggregation_target,
prop
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 ,
if(has([['test'], ['control']], prop), prop, ['Other']) as prop
FROM
(SELECT *,
if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop
FROM
(SELECT e.timestamp as timestamp,
pdi.person_id as aggregation_target,
pdi.person_id as person_id,
if(event = '$pageview', 1, 0) as step_0,
if(step_0 = 1, timestamp, null) as latest_0,
if(event = '$pageleave', 1, 0) as step_1,
if(step_1 = 1, timestamp, null) as latest_1,
array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic,
prop_basic as prop,
argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals
FROM events e
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam')
AND (step_0 = 1
OR step_1 = 1) )))
WHERE step_0 = 1 ))
GROUP BY aggregation_target,
steps,
prop
HAVING steps = max_steps)
GROUP BY prop
'''
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.3
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions frontend/src/lib/monaco/CodeEditor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ function initEditor(
monaco.languages.register({ id: 'hog', extensions: ['.hog'], mimetypes: ['application/hog'] })
monaco.languages.setLanguageConfiguration('hog', hog.conf())
monaco.languages.setMonarchTokensProvider('hog', hog.language())
monaco.languages.registerCompletionItemProvider('hog', hogQLAutocompleteProvider(HogLanguage.hog))
monaco.languages.registerCodeActionProvider('hog', hogQLMetadataProvider())
}
}
Expand Down
103 changes: 85 additions & 18 deletions posthog/hogql/autocomplete.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
from posthog.hogql.filters import replace_filters
from posthog.hogql.functions.mapping import ALL_EXPOSED_FUNCTION_NAMES
from posthog.hogql.parser import parse_select, parse_expr, parse_string_template
from posthog.hogql.parser import parse_select, parse_expr, parse_string_template, parse_program
from posthog.hogql import ast
from posthog.hogql.base import AST, CTE, ConstantType
from posthog.hogql.resolver import resolve_types
Expand All @@ -40,6 +40,8 @@
from hogvm.python.stl import STL

ALL_HOG_FUNCTIONS = list(STL.keys())
MATCH_ANY_CHARACTER = "$$_POSTHOG_ANY_$$"
PROPERTY_DEFINITION_LIMIT = 220


class GetNodeAtPositionTraverser(TraversingVisitor):
Expand Down Expand Up @@ -293,8 +295,52 @@ def extend_responses(
)


MATCH_ANY_CHARACTER = "$$_POSTHOG_ANY_$$"
PROPERTY_DEFINITION_LIMIT = 220
class VariableFinder(TraversingVisitor):
node: AST | None = None
stack: list[AST]
blocks: list[AST]
vars: list[set[str]]
node_vars: set[str]

def __init__(self, node: ast.AST):
super().__init__()
self.node = node
self.stack = []
self.blocks = []
self.vars = []
self.node_vars = set()

def visit(self, node: ast.AST | None):
if node is None:
return
if node == self.node:
for block_vars in self.vars:
self.node_vars.update(block_vars)
return

has_block = isinstance(node, ast.Block) or isinstance(node, ast.Program) or isinstance(node, ast.Function)
if has_block:
self.blocks.append(node)
self.vars.append(set())

self.stack.append(node)
super().visit(node)
self.stack.pop()

if has_block:
self.blocks.pop()
self.vars.pop()

def visit_variable_declaration(self, node: ast.VariableDeclaration):
if len(self.vars) > 0:
self.vars[-1].add(node.name)
super().visit_variable_declaration(node)


def gather_hog_variables_in_scope(root_node, node) -> list[str]:
finder = VariableFinder(node)
finder.visit(root_node)
return list(finder.node_vars)


def get_hogql_autocomplete(
Expand Down Expand Up @@ -326,44 +372,38 @@ def get_hogql_autocomplete(
query_to_try = query.query[: query.endPosition] + extra_characters + query.query[query.endPosition :]
query_start = query.startPosition
query_end = query.endPosition + length_to_add
node_ast: ast.AST

if query.language == HogLanguage.HOG_QL:
with timings.measure("parse_select"):
select_ast = parse_select(query_to_try)
select_ast = parse_select(query_to_try, timings=timings)
root_node: ast.AST = select_ast
elif query.language == HogLanguage.HOG_QL_EXPR:
with timings.measure("parse_expr"):
node_ast = parse_expr(query_to_try)
node_ast = parse_expr(query_to_try, timings=timings)
select_ast = cast(ast.SelectQuery, clone_expr(source_query, clear_locations=True))
select_ast.select = [node_ast]
root_node = node_ast
elif query.language == HogLanguage.HOG_TEMPLATE:
with timings.measure("parse_template"):
node_ast = parse_string_template(query_to_try)
node_ast = parse_string_template(query_to_try, timings=timings)
select_ast = cast(ast.SelectQuery, clone_expr(source_query, clear_locations=True))
select_ast.select = [node_ast]
root_node = node_ast
elif query.language == HogLanguage.HOG:
with timings.measure("parse_program"):
node_ast = parse_program(query_to_try, timings=timings)
select_ast = cast(ast.SelectQuery, clone_expr(source_query, clear_locations=True))
root_node = node_ast
else:
raise ValueError(f"Unsupported autocomplete language: {query.language}")

if query.filters:
try:
select_ast = cast(ast.SelectQuery, replace_filters(select_ast, query.filters, team))
except Exception:
pass

if isinstance(select_ast, ast.SelectQuery):
ctes = select_ast.ctes
elif isinstance(select_ast, ast.SelectUnionQuery):
ctes = select_ast.select_queries[0].ctes

with timings.measure("find_node"):
# to account for the magic F' symbol we append to change antlr's mode
extra = 2 if query.language == HogLanguage.HOG_TEMPLATE else 0
find_node = GetNodeAtPositionTraverser(root_node, query_start + extra, query_end + extra)
node = find_node.node
parent_node = find_node.parent_node
nearest_select = find_node.nearest_select_query or select_ast

if isinstance(query.globals, dict) and isinstance(node, ast.Field):
for index, key in enumerate(node.chain):
Expand Down Expand Up @@ -397,6 +437,33 @@ def get_hogql_autocomplete(
details=values,
)

if query.language == HogLanguage.HOG:
hog_vars = gather_hog_variables_in_scope(root_node, node)
extend_responses(
keys=hog_vars,
suggestions=response.suggestions,
kind=Kind.VARIABLE,
)
extend_responses(
ALL_HOG_FUNCTIONS,
response.suggestions,
Kind.FUNCTION,
insert_text=lambda key: f"{key}()",
)
break

if query.filters:
try:
select_ast = cast(ast.SelectQuery, replace_filters(select_ast, query.filters, team))
except Exception:
pass

if isinstance(select_ast, ast.SelectQuery):
ctes = select_ast.ctes
elif isinstance(select_ast, ast.SelectUnionQuery):
ctes = select_ast.select_queries[0].ctes
nearest_select = find_node.nearest_select_query or select_ast

table_has_alias = (
nearest_select is not None
and isinstance(nearest_select, ast.SelectQuery)
Expand Down
28 changes: 27 additions & 1 deletion posthog/hogql/test/test_autocomplete.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from posthog.hogql.database.schema.events import EventsTable
from posthog.hogql.database.schema.persons import PERSONS_FIELDS
from posthog.models.property_definition import PropertyDefinition
from posthog.schema import HogQLAutocomplete, HogQLAutocompleteResponse, HogLanguage, HogQLQuery
from posthog.schema import HogQLAutocomplete, HogQLAutocompleteResponse, HogLanguage, HogQLQuery, Kind
from posthog.test.base import APIBaseTest, ClickhouseTestMixin


Expand Down Expand Up @@ -56,6 +56,19 @@ def _template(
)
return get_hogql_autocomplete(query=autocomplete, team=self.team, database_arg=database)

def _program(
self, query: str, start: int, end: int, database: Optional[Database] = None
) -> HogQLAutocompleteResponse:
autocomplete = HogQLAutocomplete(
kind="HogQLAutocomplete",
query=query,
language=HogLanguage.HOG,
sourceQuery=HogQLQuery(query="select * from events"),
startPosition=start,
endPosition=end,
)
return get_hogql_autocomplete(query=autocomplete, team=self.team, database_arg=database)

def test_autocomplete(self):
query = "select * from events"
results = self._select(query=query, start=0, end=0)
Expand Down Expand Up @@ -310,3 +323,16 @@ def test_autocomplete_template_strings(self):
assert suggestion is not None
assert suggestion.label == "event"
assert suggestion.insertText == "event"

def test_autocomplete_hog(self):
database = create_hogql_database(team_id=self.team.pk, team_arg=self.team)

query = "let var1 := 3; let otherVar := 5; print(v)"
results = self._program(query=query, start=41, end=41, database=database)

suggestions = list(filter(lambda x: x.kind == Kind.VARIABLE, results.suggestions))
assert len(suggestions) == 2
assert sorted([suggestion.label for suggestion in suggestions]) == ["otherVar", "var1"]

suggestions = list(filter(lambda x: x.kind == Kind.FUNCTION, results.suggestions))
assert len(suggestions) > 0

0 comments on commit 0db8b48

Please sign in to comment.