Skip to content

Commit

Permalink
add unnest patch
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 committed Oct 22, 2024
1 parent 2f75a32 commit c1b2866
Showing 1 changed file with 35 additions and 3 deletions.
38 changes: 35 additions & 3 deletions metadata-ingestion/src/datahub/sql_parsing/_sqlglot_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@
import sqlglot.expressions
import sqlglot.lineage
import sqlglot.optimizer.scope
import sqlglot.optimizer.unnest_subqueries

# This injects a few patches into sqlglot to add features and mitigate
# some bugs and performance issues.

assert sqlglot is not None

# The diffs in this file should match the diffs declared in our fork.
# https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main
# For a diff-formatted view, see:
Expand Down Expand Up @@ -72,6 +70,39 @@ def _patch_scope_traverse() -> None:
)


def _patch_unnest_subqueries() -> None:
patchy.patch(
sqlglot.optimizer.unnest_subqueries.decorrelate,
"""\
@@ -261,16 +261,19 @@ def remove_aggs(node):
if key in group_by:
key.replace(nested)
elif isinstance(predicate, exp.EQ):
- parent_predicate = _replace(
- parent_predicate,
- f"({parent_predicate} AND ARRAY_CONTAINS({nested}, {column}))",
- )
+ if parent_predicate:
+ parent_predicate = _replace(
+ parent_predicate,
+ f"({parent_predicate} AND ARRAY_CONTAINS({nested}, {column}))",
+ )
else:
key.replace(exp.to_identifier("_x"))
- parent_predicate = _replace(
- parent_predicate,
- f"({parent_predicate} AND ARRAY_ANY({nested}, _x -> {predicate}))",
- )
+
+ if parent_predicate:
+ parent_predicate = _replace(
+ parent_predicate,
+ f"({parent_predicate} AND ARRAY_ANY({nested}, _x -> {predicate}))",
+ )
""",
)


def _patch_lineage() -> None:
# Add the "subfield" attribute to sqlglot.lineage.Node.
# With dataclasses, the easiest way to do this is with inheritance.
Expand Down Expand Up @@ -155,6 +186,7 @@ class Node(sqlglot.lineage.Node):

_patch_deepcopy()
_patch_scope_traverse()
_patch_unnest_subqueries()
_patch_lineage()

SQLGLOT_PATCHED = True

0 comments on commit c1b2866

Please sign in to comment.