Skip to content

Commit

Permalink
fix: Make property group optimizations more restrictive about what ty…
Browse files Browse the repository at this point in the history
…pes can be used (#25382)
  • Loading branch information
tkaemming authored Oct 4, 2024
1 parent 1d27039 commit dc9752d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 11 deletions.
26 changes: 15 additions & 11 deletions posthog/hogql/printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,15 +665,16 @@ def __get_optimized_property_group_compare_operation(self, node: ast.CompareOper
elif constant_expr.value is False:
return f"equals({property_source.value_expr}, 'false')"

printed_expr = f"equals({property_source.value_expr}, {self.visit(constant_expr)})"
if constant_expr.value == "":
# If we're comparing to an empty string literal, we need to disambiguate this from the default value
# for the ``Map(String, String)`` type used for storing property group values by also ensuring that
# the property key is present in the map. If this is in a ``WHERE`` clause, this also ensures we can
# still use the data skipping index on keys, even though the values index cannot be used.
printed_expr = f"and({property_source.has_expr}, {printed_expr})"
if isinstance(constant_expr.type, ast.StringType):
printed_expr = f"equals({property_source.value_expr}, {self.visit(constant_expr)})"
if constant_expr.value == "":
# If we're comparing to an empty string literal, we need to disambiguate this from the default value
# for the ``Map(String, String)`` type used for storing property group values by also ensuring that
# the property key is present in the map. If this is in a ``WHERE`` clause, this also ensures we can
# still use the data skipping index on keys, even though the values index cannot be used.
printed_expr = f"and({property_source.has_expr}, {printed_expr})"

return printed_expr
return printed_expr

elif node.op == ast.CompareOperationOp.NotEq:
if constant_expr.value is None:
Expand Down Expand Up @@ -701,20 +702,23 @@ def __get_optimized_property_group_compare_operation(self, node: ast.CompareOper
elif node.right.value == "":
# If the RHS is the empty string, we need to disambiguate it from the default value for missing keys.
return f"and({property_source.has_expr}, equals({property_source.value_expr}, {self.visit(node.right)}))"
else:
elif isinstance(node.right.type, ast.StringType):
return f"in({property_source.value_expr}, {self.visit(node.right)})"
elif isinstance(node.right, ast.Tuple):
# If any of the values on the RHS are the empty string, we need to disambiguate it from the default
# value for missing keys. NULLs should also be dropped, but everything else can be passed through as-is.
# value for missing keys. NULLs should also be dropped, but everything else we can directly compare
# (strings) can be passed through as-is
default_value_expr: ast.Constant | None = None
for expr in node.right.exprs[:]:
if not isinstance(expr, ast.Constant):
return None # only optimize constants for now, see above
elif expr.value is None:
if expr.value is None:
node.right.exprs.remove(expr)
elif expr.value == "":
default_value_expr = expr
node.right.exprs.remove(expr)
elif not isinstance(expr.type, ast.StringType):
return None
if len(node.right.exprs) > 0:
# TODO: Check to see if it'd be faster to do equality comparison here instead?
printed_expr = f"in({property_source.value_expr}, {self.visit(node.right)})"
Expand Down
9 changes: 9 additions & 0 deletions posthog/hogql/test/test_printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,9 @@ def test_property_groups_optimized_basic_equality_comparisons(self) -> None:
expected_skip_indexes_used={"properties_group_custom_keys_bf", "properties_group_custom_values_bf"},
)

# Don't optimize comparisons to types that require non-trivial type conversions.
self._test_property_group_comparison("properties.key = 1", None)

# TODO: We'll want to eventually support this type of expression where the right hand side is a non-``Nullable``
# value, since this would allow expressions that only reference constant values to also use the appropriate
# index, but for right now we only want to optimize comparisons to constant values directly for simplicity.
Expand Down Expand Up @@ -620,6 +623,12 @@ def test_property_groups_optimized_in_comparisons(self) -> None:
expected_skip_indexes_used={"properties_group_custom_keys_bf"},
)

# Don't optimize comparisons to types that require additional type conversions.
self._test_property_group_comparison("properties.key in true", None)
self._test_property_group_comparison("properties.key in (true, false)", None)
self._test_property_group_comparison("properties.key in 1", None)
self._test_property_group_comparison("properties.key in (1, 2, 3)", None)

# Only direct constant comparison is supported for now -- see above.
self._test_property_group_comparison("properties.key in lower('value')", None)
self._test_property_group_comparison("properties.key in (lower('a'), lower('b'))", None)
Expand Down

0 comments on commit dc9752d

Please sign in to comment.