release 11.655.24305

klahnakoski · Oct 31, 2024 · 2e07bc7 · 2e07bc7
2 parents 0aa026f + d0bee36
commit 2e07bc7
Show file tree

Hide file tree

Showing 22 changed files with 377 additions and 215 deletions.
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ The objective is to convert SQL queries to JSON-izable parse trees. This origina
 
 ## Project Status
 
-December 2023 -  I continue to resolve issues as they are raised. There are [over 1100 tests](https://app.travis-ci.com/github/klahnakoski/mo-sql-parsing), that cover most SQL for most databases, with limited DML and UDF support, including:
+October 2024 -  I continue to resolve issues as they are raised. There are [over 1200 tests](https://app.travis-ci.com/github/klahnakoski/mo-sql-parsing), that cover most SQL for most databases, with limited DML and UDF support, including:
 
   * inner queries, 
   * with clauses, 
@@ -159,6 +159,40 @@ for select in listwrap(parsed_result.get('select')):
 
 ## Version Changes, Features
 
+### Version 11
+
+*October 2024*
+
+The `PIVOT` clause has been promoted to top-level. Instead of being part of the joins found in the `FROM` clause, it is now a sibling to `SELECT`.
+
+```
+>>> from mo_sql_parsing import parse
+>>> parse("SELECT * FROM table PIVOT (SUM(x) FOR y IN (1, 2, 3))")
+```
+
+now emits
+
+```
+{
+    'select': {'all_columns': {}}, 
+    'from': 'table', 
+    'pivot': {'sum': 'x', 'for': 'y', 'in': [1, 2, 3]}
+}
+```
+
+instead of
+
+```
+{
+    'select': {'all_columns': {}}, 
+    'from': [
+        'table', 
+        'pivot': {'sum': 'x', 'for': 'y', 'in': [1, 2, 3]}
+    ]
+}
+```
+
+
 
 ### Version 10
 

diff --git a/mo_sql_parsing/formatting.py b/mo_sql_parsing/formatting.py
@@ -14,7 +14,7 @@
 from mo_future import first, is_text, string_types, text
 from mo_parsing import listwrap
 
-from mo_sql_parsing.keywords import RESERVED, join_keywords, precedence
+from mo_sql_parsing.keywords import RESERVED, join_keywords, precedence, pivot_keywords
 from mo_sql_parsing.utils import binary_ops, is_set_op
 
 MAX_PRECEDENCE = 100
@@ -110,6 +110,8 @@ def isolate(expr, sql, prec):
     "select_distinct",
     "select",
     "from",
+    "pivot",
+    "unpivot",
     "where",
     "groupby",
     "having",
@@ -511,6 +513,28 @@ def _select_distinct(self, json, prec):
     def _distinct_on(self, json, prec):
         return "DISTINCT ON (" + ", ".join(self.dispatch(v) for v in listwrap(json)) + ")"
 
+    def pivot(self, json, prec):
+        pivot = json["pivot"]
+        return self._pivot("PIVOT", pivot, self.dispatch(pivot["aggregate"]))
+
+    def unpivot(self, json, prec):
+        pivot = json["unpivot"]
+        if "nulls" in pivot:
+            nulls = " INCLUDE NULLS" if pivot["nulls"] else " EXCLUDE NULLS"
+        else:
+            nulls = ""
+        return self._pivot(f"UNPIVOT{nulls}", pivot, self.dispatch(pivot["value"]))
+
+    def _pivot(self, op, pivot, value):
+        for_ = self.dispatch(pivot["for"])
+        in_ = self.dispatch(pivot["in"])
+        sql = f"{op} ({value} FOR {for_} IN {in_})"
+        if "name" in pivot:
+            name = pivot["name"]
+            return f"{sql} AS {name}"
+        else:
+            return sql
+
     def _join_on(self, json, prec):
         detected_join = join_keywords & set(json.keys())
         if len(detected_join) == 0:
@@ -614,11 +638,14 @@ def select(self, json, prec):
             if s == "*":
                 acc.append("*")
                 continue
-            all_col = s.get("all_columns")
-            if all_col or isinstance(all_col, dict):
-                acc.append(self.all_columns(s, precedence["select"]))
-            else:
+            if isinstance(s, str):
                 acc.append(self.dispatch(s, precedence["select"]))
+            else:
+                all_col = s.get("all_columns")
+                if all_col or isinstance(all_col, dict):
+                    acc.append(self.all_columns(s, precedence["select"]))
+                else:
+                    acc.append(self.dispatch(s, precedence["select"]))
         param = ", ".join(acc)
         if "top" in json:
             top = self.dispatch(json["top"])
@@ -651,7 +678,7 @@ def select_distinct(self, json, prec):
         return f"SELECT DISTINCT {param}"
 
     def from_(self, json, prec):
-        is_join = False
+        joiner = ", "
         from_ = json["from"]
         if isinstance(from_, dict) and "literal" in from_:
             content = ", ".join(self._literal(row) for row in from_["literal"])
@@ -664,11 +691,10 @@ def from_(self, json, prec):
         parts = []
         for v in from_:
             if join_keywords & set(v):
-                is_join = True
+                joiner = " "
                 parts.append(self._join_on(v, precedence["from"] - 1))
             else:
                 parts.append(self.dispatch(v, precedence["from"] - 1))
-        joiner = " " if is_join else ", "
         rest = joiner.join(parts)
         return f"FROM {rest}"
 

diff --git a/mo_sql_parsing/keywords.py b/mo_sql_parsing/keywords.py
@@ -275,6 +275,11 @@
     "outer apply",
 }
 
+pivot_keywords = {
+    "pivot",
+    "unpivot",
+}
+
 precedence = {
     # https://www.sqlite.org/lang_expr.html
     "literal": -1,
@@ -348,7 +353,9 @@
     "where": 33,
     "groupby": 34,
     "window": 35,
-    "having": 35,
+    "pivot": 36,
+    "unpivot": 36,
+    "having": 37,
     "union": 40,
     "union_all": 40,
     "except": 40,

diff --git a/mo_sql_parsing/sql_parser.py b/mo_sql_parsing/sql_parser.py
@@ -237,14 +237,14 @@ def matching(type):
             + RB
         ) / to_json_call
 
-        single_quote_name = Regex(r"\'(\'\'|[^'])*\'") / (lambda x: single_literal(x)['literal'])
+        single_quote_name = Regex(r"\'(\'\'|[^'])*\'") / (lambda x: single_literal(x)["literal"])
 
         alias = Optional((
             (
                 (
-                    AS + ((ident|single_quote_name)("name") + Optional(LB + delimited_list(ident("col")) + RB))
+                    AS + ((ident | single_quote_name)("name") + Optional(LB + delimited_list(ident("col")) + RB))
                     | (
-                          (identifier|single_quote_name)("name")
+                        (identifier | single_quote_name)("name")
                         + Optional((LB + delimited_list(ident("col")) + RB) | (AS + delimited_list(identifier("col"))))
                     )
                 )
@@ -403,7 +403,7 @@ def scale(tokens):
 
         table_source = Forward()
 
-        pivot_join = Group(assign(
+        pivot_join = assign(
             "pivot",
             (
                 LB
@@ -412,10 +412,10 @@ def scale(tokens):
                 + RB
                 + alias
             ),
-        ))
+        )
 
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#unpivot_operator
-        unpivot_join = Group(assign(
+        unpivot_join = assign(
             "unpivot",
             (
                 Optional(keyword("EXCLUDE NULLS")("nulls") / False | keyword("INCLUDE NULLS")("nulls") / True)
@@ -435,18 +435,14 @@ def scale(tokens):
                 + RB
                 + alias
             ),
-        ))
+        )
 
         join = Forward() / to_join_call
         join << (
-            pivot_join
-            | unpivot_join
-            | (
-                Group(joins)("op")
-                + table_source("join")
-                + Optional(Group(join)("child"))
-                + Optional((ON + expression("on")) | (USING + expression("using")))
-            )
+            Group(joins)("op")
+            + table_source("join")
+            + Optional(Group(join)("child"))
+            + Optional((ON + expression("on")) | (USING + expression("using")))
         )
 
         tops = (
@@ -498,6 +494,8 @@ def scale(tokens):
             + into
             + Optional((FROM + delimited_list(table_source) + ZeroOrMore(join))("from"))
             + Optional(WHERE + expression("where"))
+            + Optional(pivot_join)
+            + Optional(unpivot_join)
             + Optional(GROUP_BY + delimited_list(Group(named_column))("groupby"))
             + (
                 Optional(HAVING + expression("having"))
@@ -708,10 +706,8 @@ def mult(tokens):
             + Optional(AS.suppress() + infix_notation(query, [])("query"))
             + Optional(CLUSTER_BY.suppress() + LB + delimited_list(identifier) + RB)("cluster_by")
             + ZeroOrMore(
-                assign("sortkey", LB + delimited_list(identifier) + RB)
-                | assign("distkey", LB + identifier + RB)
+                assign("sortkey", LB + delimited_list(identifier) + RB) | assign("distkey", LB + identifier + RB)
             )
-
         )("create table")
 
         definer = Optional(keyword("definer").suppress() + EQ + identifier("definer"))

diff --git a/packaging/setup.py b/packaging/setup.py
diff --git a/packaging/setuptools.json b/packaging/setuptools.json
@@ -45,7 +45,7 @@
             "",
             "## Project Status",
             "",
-            "December 2023 -  I continue to resolve issues as they are raised. There are [over 1100 tests](https://app.travis-ci.com/github/klahnakoski/mo-sql-parsing), that cover most SQL for most databases, with limited DML and UDF support, including:",
+            "October 2024 -  I continue to resolve issues as they are raised. There are [over 1200 tests](https://app.travis-ci.com/github/klahnakoski/mo-sql-parsing), that cover most SQL for most databases, with limited DML and UDF support, including:",
             "",
             "  * inner queries, ",
             "  * with clauses, ",
@@ -188,6 +188,40 @@
             "",
             "## Version Changes, Features",
             "",
+            "### Version 11",
+            "",
+            "*October 2024*",
+            "",
+            "The `PIVOT` clause has been promoted to top-level. Instead of being part of the joins found in the `FROM` clause, it is now a sibling to `SELECT`.",
+            "",
+            "```",
+            ">>> from mo_sql_parsing import parse",
+            ">>> parse(\"SELECT * FROM table PIVOT (SUM(x) FOR y IN (1, 2, 3))\")",
+            "```",
+            "",
+            "now emits",
+            "",
+            "```",
+            "{",
+            "    'select': {'all_columns': {}}, ",
+            "    'from': 'table', ",
+            "    'pivot': {'sum': 'x', 'for': 'y', 'in': [1, 2, 3]}",
+            "}",
+            "```",
+            "",
+            "instead of",
+            "",
+            "```",
+            "{",
+            "    'select': {'all_columns': {}}, ",
+            "    'from': [",
+            "        'table', ",
+            "        'pivot': {'sum': 'x', 'for': 'y', 'in': [1, 2, 3]}",
+            "    ]",
+            "}",
+            "```",
+            "",
+            "",
             "",
             "### Version 10",
             "",
@@ -312,6 +346,6 @@
     "name": "mo-sql-parsing",
     "packages": ["mo_sql_parsing"],
     "url": "https://github.com/klahnakoski/mo-sql-parsing",
-    "version": "10.657.24299",
+    "version": "11.655.24305",
     "zip_safe": true
 }
diff --git a/tests/mysql/products.py b/tests/mysql/products.py
@@ -116,5 +116,5 @@
         ]},
         "insert": "product",
     },
-    {"commit":{}},
+    {"commit": {}},
 ]
diff --git a/tests/smoke_test1.py b/tests/smoke_test1.py
@@ -12,7 +12,7 @@
 start_import = time()
 from mo_sql_parsing import parse, normal_op
 end_time = time()
-import_time = end_time-start_import
+import_time = end_time - start_import
 if import_time > 0.2:
     raise Exception(f"importing mo_sql_parsing took too long ({import_time} seconds)")
 
@@ -21,7 +21,7 @@
 start_run = time()
 result = parse(sql, calls=normal_op)
 end_run = time()
-run_time = end_run-start_run
+run_time = end_run - start_run
 
 print(result)
 print(f"done in {run_time} seconds")
diff --git a/tests/smoke_test2.py b/tests/smoke_test2.py
@@ -12,8 +12,9 @@
 # ensure first import is fast
 start_import = time()
 from mo_sql_parsing import format
+
 end_time = time()
-print(format({"from":"a"}))
+print(format({"from": "a"}))
 for e in mo_imports._expectations:
     print((object.__getattribute__(e, "module"), object.__getattribute__(e, "name")))
 

diff --git a/tests/test_athena.py b/tests/test_athena.py
@@ -188,14 +188,12 @@ def test_issue_221_unpivot(self):
         UNPIVOT(sales FOR quarter IN (Q1 as 'Q1_a', Q2 as 'Q2_a', Q3, Q4))"""
         result = parse(sql)
         expected = {
-            "from": [
-                "Produce",
-                {"unpivot": {
-                    "for": "quarter",
-                    "in": [{"name": "Q1_a", "value": "Q1"}, {"name": "Q2_a", "value": "Q2"}, "Q3", "Q4",],
-                    "value": "sales",
-                }},
-            ],
+            "from": "Produce",
+            "unpivot": {
+                "for": "quarter",
+                "in": [{"name": "Q1_a", "value": "Q1"}, {"name": "Q2_a", "value": "Q2"}, "Q3", "Q4",],
+                "value": "sales",
+            },
             "select": {"all_columns": {}},
             "with": {
                 "name": "Produce",

diff --git a/tests/test_big_sql.py b/tests/test_big_sql.py
@@ -40,7 +40,6 @@ def test_large_expression(self):
         with Timer("parse long expression"):
             result = parse(crazy)
 
-
     def test_issue_103b(self):
         #        0         1         2         3         4         5         6         7         8         9
         #        012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789