Skip to content

Commit

Permalink
fix(hogql): equate equals and equals (#18655)
Browse files Browse the repository at this point in the history
  • Loading branch information
mariusandra authored Nov 16, 2023
1 parent 333df36 commit 64f1f91
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 32 deletions.
8 changes: 4 additions & 4 deletions ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@
AND event = '$pageview'
AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day
AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day
AND (and(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), '%example%'), 1))
AND (and(ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), '%example%'), 0), 1))
GROUP BY pdi.person_id)
GROUP BY start_of_period,
status)
Expand Down Expand Up @@ -426,7 +426,7 @@
AND event = '$pageview'
AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day
AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day
AND (and(like(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), '%example%'), 1))
AND (and(ifNull(like(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), '%example%'), 0), 1))
GROUP BY pdi.person_id)
GROUP BY start_of_period,
status)
Expand Down Expand Up @@ -501,7 +501,7 @@
AND event = '$pageview'
AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day
AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day
AND (like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'email'), ''), 'null'), '^"|"$', ''), '%test.com'))
AND (ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'email'), ''), 'null'), '^"|"$', ''), '%test.com'), 0))
GROUP BY pdi.person_id)
GROUP BY start_of_period,
status)
Expand Down Expand Up @@ -576,7 +576,7 @@
AND event = '$pageview'
AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day
AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day
AND (like(nullIf(nullIf(mat_pp_email, ''), 'null'), '%test.com'))
AND (ifNull(like(nullIf(nullIf(mat_pp_email, ''), 'null'), '%test.com'), 0))
GROUP BY pdi.person_id)
GROUP BY start_of_period,
status)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1613,8 +1613,7 @@
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC')
AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', '')))
AND (ifNull(ilike(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'hogql'), ''), 'null'), '^"|"$', ''), 'true'), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'hogql'), ''), 'null'), '^"|"$', ''))
and isNull('true'))))
AND (ifNull(ilike(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'hogql'), ''), 'null'), '^"|"$', ''), 'true'), 0)))
GROUP BY value
ORDER BY count DESC, value DESC
LIMIT 25
Expand Down Expand Up @@ -1655,8 +1654,7 @@
WHERE e.team_id = 2
AND event = '$pageview'
AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', '')))
AND (ifNull(ilike(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'hogql'), ''), 'null'), '^"|"$', ''), 'true'), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'hogql'), ''), 'null'), '^"|"$', ''))
and isNull('true'))))
AND (ifNull(ilike(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'hogql'), ''), 'null'), '^"|"$', ''), 'true'), 0)))
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC')
AND replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '') in (['test', 'control'])
Expand Down
14 changes: 7 additions & 7 deletions posthog/api/test/__snapshots__/test_insight.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-08 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC')
AND ((and(ifNull(less(toInt64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'int_value'), ''), 'null'), '^"|"$', '')), 10), 0), 1))
AND (like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%')))
AND (ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%'), 0)))
AND (step_0 = 1
OR step_1 = 1) ))
WHERE step_0 = 1 ))
Expand Down Expand Up @@ -215,11 +215,11 @@
person.person_props as person_props ,
if(event = 'user signed up'
AND (and(ifNull(less(toInt64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'int_value'), ''), 'null'), '^"|"$', '')), 10), 0), 1)
AND like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%')), 1, 0) as step_0,
AND ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%'), 0)), 1, 0) as step_0,
if(step_0 = 1, timestamp, null) as latest_0,
if(event = 'user did things'
AND (and(ifNull(less(toInt64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'int_value'), ''), 'null'), '^"|"$', '')), 10), 0), 1)
AND like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%')), 1, 0) as step_1,
AND ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%'), 0)), 1, 0) as step_1,
if(step_1 = 1, timestamp, null) as latest_1
FROM events e
INNER JOIN
Expand Down Expand Up @@ -438,7 +438,7 @@
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-08 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC')
AND ((and(ifNull(greater(toInt64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'int_value'), ''), 'null'), '^"|"$', '')), 10), 0), 1))
AND (like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%')))
AND (ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%'), 0)))
GROUP BY date)
GROUP BY day_start
ORDER BY day_start)
Expand Down Expand Up @@ -506,7 +506,7 @@
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-08 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC')
AND ((and(ifNull(greater(toInt64OrNull(nullIf(nullIf(events.mat_int_value, ''), 'null')), 10), 0), 1))
AND (like(nullIf(nullIf(mat_pp_fish, ''), 'null'), '%fish%')))
AND (ifNull(like(nullIf(nullIf(mat_pp_fish, ''), 'null'), '%fish%'), 0)))
GROUP BY date)
GROUP BY day_start
ORDER BY day_start)
Expand Down Expand Up @@ -548,7 +548,7 @@
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-08 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC')
AND (and(ifNull(less(toInt64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'int_value'), ''), 'null'), '^"|"$', '')), 10), 0), 1)
AND like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%'))
AND ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%'), 0))
GROUP BY date)
GROUP BY day_start
ORDER BY day_start)
Expand Down Expand Up @@ -590,7 +590,7 @@
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-08 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC')
AND (and(ifNull(less(toInt64OrNull(nullIf(nullIf(events.mat_int_value, ''), 'null')), 10), 0), 1)
AND like(nullIf(nullIf(mat_pp_fish, ''), 'null'), '%fish%'))
AND ifNull(like(nullIf(nullIf(mat_pp_fish, ''), 'null'), '%fish%'), 0))
GROUP BY date)
GROUP BY day_start
ORDER BY day_start)
Expand Down
8 changes: 3 additions & 5 deletions posthog/api/test/__snapshots__/test_query.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@
'a%sd',
concat(ifNull(toString(events.event), ''), ' ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'key'), ''), 'null'), '^"|"$', '')), ''))
FROM events
WHERE and(equals(events.team_id, 2), ifNull(ilike(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'path'), ''), 'null'), '^"|"$', ''), '%/%'), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'path'), ''), 'null'), '^"|"$', ''))
and isNull('%/%')), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC')))
WHERE and(equals(events.team_id, 2), ifNull(ilike(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'path'), ''), 'null'), '^"|"$', ''), '%/%'), 0), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC')))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0 SETTINGS readonly=2,
Expand Down Expand Up @@ -93,8 +92,7 @@
'a%sd',
concat(ifNull(toString(events.event), ''), ' ', ifNull(toString(nullIf(nullIf(events.mat_key, ''), 'null')), ''))
FROM events
WHERE and(equals(events.team_id, 2), ifNull(ilike(nullIf(nullIf(events.mat_path, ''), 'null'), '%/%'), isNull(nullIf(nullIf(events.mat_path, ''), 'null'))
and isNull('%/%')), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC')))
WHERE and(equals(events.team_id, 2), ifNull(ilike(nullIf(nullIf(events.mat_path, ''), 'null'), '%/%'), 0), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC')))
ORDER BY events.event ASC
LIMIT 101
OFFSET 0 SETTINGS readonly=2,
Expand Down Expand Up @@ -533,7 +531,7 @@
SELECT count(),
events.event
FROM events
WHERE and(equals(events.team_id, 2), or(equals(events.event, 'sign up'), like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'key'), ''), 'null'), '^"|"$', ''), '%val2')), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC')))
WHERE and(equals(events.team_id, 2), or(equals(events.event, 'sign up'), ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'key'), ''), 'null'), '^"|"$', ''), '%val2'), 0)), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC')))
GROUP BY events.event
ORDER BY count() DESC, events.event ASC
LIMIT 101
Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class Database(BaseModel):
_timezone: Optional[str]
_week_start_day: Optional[WeekStartDay]

def __init__(self, timezone: Optional[str], week_start_day: Optional[WeekStartDay]):
def __init__(self, timezone: Optional[str] = None, week_start_day: Optional[WeekStartDay] = None):
super().__init__()
try:
self._timezone = str(ZoneInfo(timezone)) if timezone else None
Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql/database/schema/persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def select_from_persons_table(requested_fields: Dict[str, List[str]], modifiers:
SELECT id, max(version) as version
FROM raw_persons
GROUP BY id
HAVING ifNull(equals(argMax(raw_persons.is_deleted, raw_persons.version), 0), 0)
HAVING equals(argMax(raw_persons.is_deleted, raw_persons.version), 0)
)
"""
)
Expand Down
15 changes: 15 additions & 0 deletions posthog/hogql/functions/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,21 @@ class HogQLFunctionMeta:
"""Whether the function is timezone-aware. This means the project timezone will be appended as the last arg."""


HOGQL_COMPARISON_MAPPING: Dict[str, ast.CompareOperationOp] = {
"equals": ast.CompareOperationOp.Eq,
"notEquals": ast.CompareOperationOp.NotEq,
"less": ast.CompareOperationOp.Lt,
"greater": ast.CompareOperationOp.Gt,
"lessOrEquals": ast.CompareOperationOp.LtEq,
"greaterOrEquals": ast.CompareOperationOp.GtEq,
"like": ast.CompareOperationOp.Like,
"ilike": ast.CompareOperationOp.ILike,
"notLike": ast.CompareOperationOp.NotLike,
"notILike": ast.CompareOperationOp.NotILike,
"in": ast.CompareOperationOp.In,
"notIn": ast.CompareOperationOp.NotIn,
}

HOGQL_CLICKHOUSE_FUNCTIONS: Dict[str, HogQLFunctionMeta] = {
# arithmetic
"plus": HogQLFunctionMeta("plus", 2, 2),
Expand Down
28 changes: 24 additions & 4 deletions posthog/hogql/printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
escape_hogql_identifier,
escape_hogql_string,
)
from posthog.hogql.functions.mapping import ALL_EXPOSED_FUNCTION_NAMES, validate_function_args
from posthog.hogql.functions.mapping import ALL_EXPOSED_FUNCTION_NAMES, validate_function_args, HOGQL_COMPARISON_MAPPING
from posthog.hogql.resolver import ResolverException, resolve_types
from posthog.hogql.resolver_utils import lookup_field_by_name
from posthog.hogql.transforms.in_cohort import resolve_in_cohorts
Expand Down Expand Up @@ -556,7 +556,11 @@ def visit_compare_operation(self, node: ast.CompareOperation):
return op

# Special optimization for "Eq" operator
if node.op == ast.CompareOperationOp.Eq:
if (
node.op == ast.CompareOperationOp.Eq
or node.op == ast.CompareOperationOp.Like
or node.op == ast.CompareOperationOp.ILike
):
if isinstance(node.right, ast.Constant):
if node.right.value is None:
return f"isNull({left})"
Expand All @@ -568,7 +572,11 @@ def visit_compare_operation(self, node: ast.CompareOperation):
return f"ifNull({op}, isNull({left}) and isNull({right}))" # Worse case performance, but accurate

# Special optimization for "NotEq" operator
if node.op == ast.CompareOperationOp.NotEq:
if (
node.op == ast.CompareOperationOp.NotEq
or node.op == ast.CompareOperationOp.NotLike
or node.op == ast.CompareOperationOp.NotILike
):
if isinstance(node.right, ast.Constant):
if node.right.value is None:
return f"isNotNull({left})"
Expand Down Expand Up @@ -655,7 +663,19 @@ def visit_field(self, node: ast.Field):
raise HogQLException(f"Unknown Type, can not print {type(node.type).__name__}")

def visit_call(self, node: ast.Call):
if node.name in HOGQL_AGGREGATIONS:
if node.name in HOGQL_COMPARISON_MAPPING:
op = HOGQL_COMPARISON_MAPPING[node.name]
if len(node.args) != 2:
raise HogQLException(f"Comparison '{node.name}' requires exactly two arguments")
# We do "cleverer" logic with nullable types in visit_compare_operation
return self.visit_compare_operation(
ast.CompareOperation(
left=node.args[0],
right=node.args[1],
op=op,
)
)
elif node.name in HOGQL_AGGREGATIONS:
func_meta = HOGQL_AGGREGATIONS[node.name]

validate_function_args(
Expand Down
Loading

0 comments on commit 64f1f91

Please sign in to comment.