diff --git a/mo_sql_parsing/sql_parser.py b/mo_sql_parsing/sql_parser.py index 40c3eaa..50c35e6 100644 --- a/mo_sql_parsing/sql_parser.py +++ b/mo_sql_parsing/sql_parser.py @@ -704,6 +704,11 @@ def mult(tokens): ) + Optional(AS.suppress() + infix_notation(query, [])("query")) + Optional(CLUSTER_BY.suppress() + LB + delimited_list(identifier) + RB)("cluster_by") + + ZeroOrMore( + assign("sortkey", LB + delimited_list(identifier) + RB) + | assign("distkey", LB + identifier + RB) + ) + )("create table") definer = Optional(keyword("definer").suppress() + EQ + identifier("definer")) diff --git a/mo_sql_parsing/types.py b/mo_sql_parsing/types.py index 849a51d..a36871f 100644 --- a/mo_sql_parsing/types.py +++ b/mo_sql_parsing/types.py @@ -259,6 +259,7 @@ def get_column_type(expr, identifier, literal_string): | flag("auto_increment") | flag("autoincrement") | assign("comment", literal_string) + | assign("encode", identifier) | assign("character set", identifier) | assign("collate", Optional(EQ) + identifier) | flag("primary key") diff --git a/mo_sql_parsing/utils.py b/mo_sql_parsing/utils.py index 620174d..8439e03 100644 --- a/mo_sql_parsing/utils.py +++ b/mo_sql_parsing/utils.py @@ -908,8 +908,9 @@ def no_dashes(tokens, start, string): digit = Char("0123456789") with whitespaces.NO_WHITESPACE: - ident_w_dash = Char(FIRST_IDENT_CHAR) + (Regex("(?<=[^ 0-9])\\-(?=[^ 0-9])") | Char(IDENT_CHAR))[...] - ident_w_dash_warning = Regex(ident_w_dash.__regex__()[1]).set_parser_name("identifier_with_dashes") / no_dashes + # repack the expression into a regex for faster parsing ident_w_dash + ident_w_dash = Regex((Char(FIRST_IDENT_CHAR) + (Regex("(?<=[^ 0-9])\\-(?=[^ 0-9])") | Char(IDENT_CHAR))[...]).__regex__()[1]) + ident_w_dash_warning = ident_w_dash.set_parser_name("identifier_with_dashes") / no_dashes simple_ident = Word(FIRST_IDENT_CHAR, IDENT_CHAR).set_parser_name("identifier") sqlserver_local_ident = Word("@" + FIRST_IDENT_CHAR, IDENT_CHAR).set_parser_name("identifier") diff --git a/packaging/setup.py b/packaging/setup.py index e2aac6f..1ff2ed8 100644 --- a/packaging/setup.py +++ b/packaging/setup.py @@ -15,6 +15,6 @@ name='mo-sql-parsing', packages=["mo_sql_parsing"], url='https://github.com/klahnakoski/mo-sql-parsing', - version='10.651.24172', + version='10.652.24214', zip_safe=True ) \ No newline at end of file diff --git a/packaging/setuptools.json b/packaging/setuptools.json index 183f249..1bb1afb 100644 --- a/packaging/setuptools.json +++ b/packaging/setuptools.json @@ -312,6 +312,6 @@ "name": "mo-sql-parsing", "packages": ["mo_sql_parsing"], "url": "https://github.com/klahnakoski/mo-sql-parsing", - "version": "10.651.24172", + "version": "10.652.24214", "zip_safe": true } \ No newline at end of file diff --git a/tests/test_redshift.py b/tests/test_redshift.py index 316f7ee..553ff6c 100644 --- a/tests/test_redshift.py +++ b/tests/test_redshift.py @@ -13,6 +13,158 @@ class TestRedshift(TestCase): + def test_issue245a_encode(self): + sql = f""" + CREATE TABLE IF NOT EXISTS web.customers + ( + customer_number INT ENCODE LZO, + metadata SUPER ENCODE LZO, + email VARCHAR(255) ENCODE LZO, + created_at TIMESTAMP ENCODE AZ64, + last_logged_in_at TIMESTAMP ENCODE AZ64 + ) + """ + result = parse(sql) + self.assertEqual( + result, { + "create table": { + "replace": False, + "name": "web.customers", + "columns": [ + {"name": "customer_number", "type": {"int": {}}, "encode": "LZO"}, + {"name": "metadata", "type": "SUPER", "encode": "LZO"}, + {"name": "email", "type": {"varchar": 255}, "encode": "LZO"}, + {"name": "created_at", "type": {"timestamp": {}}, "encode": "AZ64"}, + {"name": "last_logged_in_at", "type": {"timestamp": {}}, "encode": "AZ64"} + ] + } + } + ) + + def test_issue245b_single_sortkey(self): + sql = f""" + CREATE TABLE IF NOT EXISTS web.customers + ( + customer_number INT, + metadata SUPER, + email VARCHAR(255), + created_at TIMESTAMP, + last_logged_in_at TIMESTAMP + ) + SORTKEY (customer_number) + """ + result = parse(sql) + self.assertEqual( + result, { + "create table": { + "replace": False, + "name": "web.customers", + "columns": [ + {"name": "customer_number", "type": {"int": {}}}, + {"name": "metadata", "type": "SUPER"}, + {"name": "email", "type": {"varchar": 255}}, + {"name": "created_at", "type": {"timestamp": {}}}, + {"name": "last_logged_in_at", "type": {"timestamp": {}}} + ], + "sortkey": "customer_number" + } + } + ) + + def test_issue245c_multiple_sortkeys(self): + sql = f""" + CREATE TABLE IF NOT EXISTS web.customers + ( + customer_number INT, + metadata SUPER, + email VARCHAR(255), + created_at TIMESTAMP, + last_logged_in_at TIMESTAMP + ) + SORTKEY(customer_number, email) + """ + result = parse(sql) + self.assertEqual( + result, { + "create table": { + "replace": False, + "name": "web.customers", + "columns": [ + {"name": "customer_number", "type": {"int": {}}}, + {"name": "metadata", "type": "SUPER"}, + {"name": "email", "type": {"varchar": 255}}, + {"name": "created_at", "type": {"timestamp": {}}}, + {"name": "last_logged_in_at", "type": {"timestamp": {}}} + ], + "sortkey": [ + "customer_number", "email" + ] + } + } + ) + + def test_issue245d_distkey(self): + sql = f""" + CREATE TABLE IF NOT EXISTS web.customers + ( + customer_number INT, + metadata SUPER, + email VARCHAR(255), + created_at TIMESTAMP, + last_logged_in_at TIMESTAMP + ) + DISTKEY(customer_number) + """ + result = parse(sql) + self.assertEqual( + result, { + "create table": { + "replace": False, + "name": "web.customers", + "columns": [ + {"name": "customer_number", "type": {"int": {}}}, + {"name": "metadata", "type": "SUPER"}, + {"name": "email", "type": {"varchar": 255}}, + {"name": "created_at", "type": {"timestamp": {}}}, + {"name": "last_logged_in_at", "type": {"timestamp": {}}} + ], + "distkey": "customer_number" + } + } + ) + + def test_issue245e_combination(self): + sql = f""" + CREATE TABLE IF NOT EXISTS web.customers + ( + customer_number INT ENCODE LZO, + metadata SUPER ENCODE LZO, + email VARCHAR(255) ENCODE LZO, + created_at TIMESTAMP ENCODE AZ64, + last_logged_in_at TIMESTAMP ENCODE AZ64 + ) + DISTKEY (customer_number) + SORTKEY(created_at) + """ + result = parse(sql) + self.assertEqual( + result, { + "create table": { + "replace": False, + "name": "web.customers", + "columns": [ + {"name": "customer_number", "type": {"int": {}}, "encode": "LZO"}, + {"name": "metadata", "type": "SUPER", "encode": "LZO"}, + {"name": "email", "type": {"varchar": 255}, "encode": "LZO"}, + {"name": "created_at", "type": {"timestamp": {}}, "encode": "AZ64"}, + {"name": "last_logged_in_at", "type": {"timestamp": {}}, "encode": "AZ64"} + ], + "distkey": "customer_number", + "sortkey": "created_at" + } + } + ) + def test_issue149a_casting(self): # Ref: https://docs.aws.amazon.com/redshift/latest/dg/r_CAST_function.html#r_CAST_function-examples sql = "select '' :: varchar as placeholder from table"