Skip to content

Commit

Permalink
release 11.655.24305
Browse files Browse the repository at this point in the history
  • Loading branch information
klahnakoski committed Oct 31, 2024
2 parents 0aa026f + d0bee36 commit 2e07bc7
Show file tree
Hide file tree
Showing 22 changed files with 377 additions and 215 deletions.
36 changes: 35 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ The objective is to convert SQL queries to JSON-izable parse trees. This origina

## Project Status

December 2023 - I continue to resolve issues as they are raised. There are [over 1100 tests](https://app.travis-ci.com/github/klahnakoski/mo-sql-parsing), that cover most SQL for most databases, with limited DML and UDF support, including:
October 2024 - I continue to resolve issues as they are raised. There are [over 1200 tests](https://app.travis-ci.com/github/klahnakoski/mo-sql-parsing), that cover most SQL for most databases, with limited DML and UDF support, including:

* inner queries,
* with clauses,
Expand Down Expand Up @@ -159,6 +159,40 @@ for select in listwrap(parsed_result.get('select')):

## Version Changes, Features

### Version 11

*October 2024*

The `PIVOT` clause has been promoted to top-level. Instead of being part of the joins found in the `FROM` clause, it is now a sibling to `SELECT`.

```
>>> from mo_sql_parsing import parse
>>> parse("SELECT * FROM table PIVOT (SUM(x) FOR y IN (1, 2, 3))")
```

now emits

```
{
'select': {'all_columns': {}},
'from': 'table',
'pivot': {'sum': 'x', 'for': 'y', 'in': [1, 2, 3]}
}
```

instead of

```
{
'select': {'all_columns': {}},
'from': [
'table',
'pivot': {'sum': 'x', 'for': 'y', 'in': [1, 2, 3]}
]
}
```



### Version 10

Expand Down
42 changes: 34 additions & 8 deletions mo_sql_parsing/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from mo_future import first, is_text, string_types, text
from mo_parsing import listwrap

from mo_sql_parsing.keywords import RESERVED, join_keywords, precedence
from mo_sql_parsing.keywords import RESERVED, join_keywords, precedence, pivot_keywords
from mo_sql_parsing.utils import binary_ops, is_set_op

MAX_PRECEDENCE = 100
Expand Down Expand Up @@ -110,6 +110,8 @@ def isolate(expr, sql, prec):
"select_distinct",
"select",
"from",
"pivot",
"unpivot",
"where",
"groupby",
"having",
Expand Down Expand Up @@ -511,6 +513,28 @@ def _select_distinct(self, json, prec):
def _distinct_on(self, json, prec):
return "DISTINCT ON (" + ", ".join(self.dispatch(v) for v in listwrap(json)) + ")"

def pivot(self, json, prec):
pivot = json["pivot"]
return self._pivot("PIVOT", pivot, self.dispatch(pivot["aggregate"]))

def unpivot(self, json, prec):
pivot = json["unpivot"]
if "nulls" in pivot:
nulls = " INCLUDE NULLS" if pivot["nulls"] else " EXCLUDE NULLS"
else:
nulls = ""
return self._pivot(f"UNPIVOT{nulls}", pivot, self.dispatch(pivot["value"]))

def _pivot(self, op, pivot, value):
for_ = self.dispatch(pivot["for"])
in_ = self.dispatch(pivot["in"])
sql = f"{op} ({value} FOR {for_} IN {in_})"
if "name" in pivot:
name = pivot["name"]
return f"{sql} AS {name}"
else:
return sql

def _join_on(self, json, prec):
detected_join = join_keywords & set(json.keys())
if len(detected_join) == 0:
Expand Down Expand Up @@ -614,11 +638,14 @@ def select(self, json, prec):
if s == "*":
acc.append("*")
continue
all_col = s.get("all_columns")
if all_col or isinstance(all_col, dict):
acc.append(self.all_columns(s, precedence["select"]))
else:
if isinstance(s, str):
acc.append(self.dispatch(s, precedence["select"]))
else:
all_col = s.get("all_columns")
if all_col or isinstance(all_col, dict):
acc.append(self.all_columns(s, precedence["select"]))
else:
acc.append(self.dispatch(s, precedence["select"]))
param = ", ".join(acc)
if "top" in json:
top = self.dispatch(json["top"])
Expand Down Expand Up @@ -651,7 +678,7 @@ def select_distinct(self, json, prec):
return f"SELECT DISTINCT {param}"

def from_(self, json, prec):
is_join = False
joiner = ", "
from_ = json["from"]
if isinstance(from_, dict) and "literal" in from_:
content = ", ".join(self._literal(row) for row in from_["literal"])
Expand All @@ -664,11 +691,10 @@ def from_(self, json, prec):
parts = []
for v in from_:
if join_keywords & set(v):
is_join = True
joiner = " "
parts.append(self._join_on(v, precedence["from"] - 1))
else:
parts.append(self.dispatch(v, precedence["from"] - 1))
joiner = " " if is_join else ", "
rest = joiner.join(parts)
return f"FROM {rest}"

Expand Down
9 changes: 8 additions & 1 deletion mo_sql_parsing/keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@
"outer apply",
}

pivot_keywords = {
"pivot",
"unpivot",
}

precedence = {
# https://www.sqlite.org/lang_expr.html
"literal": -1,
Expand Down Expand Up @@ -348,7 +353,9 @@
"where": 33,
"groupby": 34,
"window": 35,
"having": 35,
"pivot": 36,
"unpivot": 36,
"having": 37,
"union": 40,
"union_all": 40,
"except": 40,
Expand Down
32 changes: 14 additions & 18 deletions mo_sql_parsing/sql_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,14 +237,14 @@ def matching(type):
+ RB
) / to_json_call

single_quote_name = Regex(r"\'(\'\'|[^'])*\'") / (lambda x: single_literal(x)['literal'])
single_quote_name = Regex(r"\'(\'\'|[^'])*\'") / (lambda x: single_literal(x)["literal"])

alias = Optional((
(
(
AS + ((ident|single_quote_name)("name") + Optional(LB + delimited_list(ident("col")) + RB))
AS + ((ident | single_quote_name)("name") + Optional(LB + delimited_list(ident("col")) + RB))
| (
(identifier|single_quote_name)("name")
(identifier | single_quote_name)("name")
+ Optional((LB + delimited_list(ident("col")) + RB) | (AS + delimited_list(identifier("col"))))
)
)
Expand Down Expand Up @@ -403,7 +403,7 @@ def scale(tokens):

table_source = Forward()

pivot_join = Group(assign(
pivot_join = assign(
"pivot",
(
LB
Expand All @@ -412,10 +412,10 @@ def scale(tokens):
+ RB
+ alias
),
))
)

# https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#unpivot_operator
unpivot_join = Group(assign(
unpivot_join = assign(
"unpivot",
(
Optional(keyword("EXCLUDE NULLS")("nulls") / False | keyword("INCLUDE NULLS")("nulls") / True)
Expand All @@ -435,18 +435,14 @@ def scale(tokens):
+ RB
+ alias
),
))
)

join = Forward() / to_join_call
join << (
pivot_join
| unpivot_join
| (
Group(joins)("op")
+ table_source("join")
+ Optional(Group(join)("child"))
+ Optional((ON + expression("on")) | (USING + expression("using")))
)
Group(joins)("op")
+ table_source("join")
+ Optional(Group(join)("child"))
+ Optional((ON + expression("on")) | (USING + expression("using")))
)

tops = (
Expand Down Expand Up @@ -498,6 +494,8 @@ def scale(tokens):
+ into
+ Optional((FROM + delimited_list(table_source) + ZeroOrMore(join))("from"))
+ Optional(WHERE + expression("where"))
+ Optional(pivot_join)
+ Optional(unpivot_join)
+ Optional(GROUP_BY + delimited_list(Group(named_column))("groupby"))
+ (
Optional(HAVING + expression("having"))
Expand Down Expand Up @@ -708,10 +706,8 @@ def mult(tokens):
+ Optional(AS.suppress() + infix_notation(query, [])("query"))
+ Optional(CLUSTER_BY.suppress() + LB + delimited_list(identifier) + RB)("cluster_by")
+ ZeroOrMore(
assign("sortkey", LB + delimited_list(identifier) + RB)
| assign("distkey", LB + identifier + RB)
assign("sortkey", LB + delimited_list(identifier) + RB) | assign("distkey", LB + identifier + RB)
)

)("create table")

definer = Optional(keyword("definer").suppress() + EQ + identifier("definer"))
Expand Down
4 changes: 2 additions & 2 deletions packaging/setup.py

Large diffs are not rendered by default.

38 changes: 36 additions & 2 deletions packaging/setuptools.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"",
"## Project Status",
"",
"December 2023 - I continue to resolve issues as they are raised. There are [over 1100 tests](https://app.travis-ci.com/github/klahnakoski/mo-sql-parsing), that cover most SQL for most databases, with limited DML and UDF support, including:",
"October 2024 - I continue to resolve issues as they are raised. There are [over 1200 tests](https://app.travis-ci.com/github/klahnakoski/mo-sql-parsing), that cover most SQL for most databases, with limited DML and UDF support, including:",
"",
" * inner queries, ",
" * with clauses, ",
Expand Down Expand Up @@ -188,6 +188,40 @@
"",
"## Version Changes, Features",
"",
"### Version 11",
"",
"*October 2024*",
"",
"The `PIVOT` clause has been promoted to top-level. Instead of being part of the joins found in the `FROM` clause, it is now a sibling to `SELECT`.",
"",
"```",
">>> from mo_sql_parsing import parse",
">>> parse(\"SELECT * FROM table PIVOT (SUM(x) FOR y IN (1, 2, 3))\")",
"```",
"",
"now emits",
"",
"```",
"{",
" 'select': {'all_columns': {}}, ",
" 'from': 'table', ",
" 'pivot': {'sum': 'x', 'for': 'y', 'in': [1, 2, 3]}",
"}",
"```",
"",
"instead of",
"",
"```",
"{",
" 'select': {'all_columns': {}}, ",
" 'from': [",
" 'table', ",
" 'pivot': {'sum': 'x', 'for': 'y', 'in': [1, 2, 3]}",
" ]",
"}",
"```",
"",
"",
"",
"### Version 10",
"",
Expand Down Expand Up @@ -312,6 +346,6 @@
"name": "mo-sql-parsing",
"packages": ["mo_sql_parsing"],
"url": "https://github.com/klahnakoski/mo-sql-parsing",
"version": "10.657.24299",
"version": "11.655.24305",
"zip_safe": true
}
2 changes: 1 addition & 1 deletion tests/mysql/products.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,5 +116,5 @@
]},
"insert": "product",
},
{"commit":{}},
{"commit": {}},
]
4 changes: 2 additions & 2 deletions tests/smoke_test1.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
start_import = time()
from mo_sql_parsing import parse, normal_op
end_time = time()
import_time = end_time-start_import
import_time = end_time - start_import
if import_time > 0.2:
raise Exception(f"importing mo_sql_parsing took too long ({import_time} seconds)")

Expand All @@ -21,7 +21,7 @@
start_run = time()
result = parse(sql, calls=normal_op)
end_run = time()
run_time = end_run-start_run
run_time = end_run - start_run

print(result)
print(f"done in {run_time} seconds")
3 changes: 2 additions & 1 deletion tests/smoke_test2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
# ensure first import is fast
start_import = time()
from mo_sql_parsing import format

end_time = time()
print(format({"from":"a"}))
print(format({"from": "a"}))
for e in mo_imports._expectations:
print((object.__getattribute__(e, "module"), object.__getattribute__(e, "name")))

Expand Down
14 changes: 6 additions & 8 deletions tests/test_athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,14 +188,12 @@ def test_issue_221_unpivot(self):
UNPIVOT(sales FOR quarter IN (Q1 as 'Q1_a', Q2 as 'Q2_a', Q3, Q4))"""
result = parse(sql)
expected = {
"from": [
"Produce",
{"unpivot": {
"for": "quarter",
"in": [{"name": "Q1_a", "value": "Q1"}, {"name": "Q2_a", "value": "Q2"}, "Q3", "Q4",],
"value": "sales",
}},
],
"from": "Produce",
"unpivot": {
"for": "quarter",
"in": [{"name": "Q1_a", "value": "Q1"}, {"name": "Q2_a", "value": "Q2"}, "Q3", "Q4",],
"value": "sales",
},
"select": {"all_columns": {}},
"with": {
"name": "Produce",
Expand Down
1 change: 0 additions & 1 deletion tests/test_big_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def test_large_expression(self):
with Timer("parse long expression"):
result = parse(crazy)


def test_issue_103b(self):
# 0 1 2 3 4 5 6 7 8 9
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
Expand Down
Loading

0 comments on commit 2e07bc7

Please sign in to comment.