From 9939e2d016265cd1f6cf8deb88f5e4fe0d87f6cd Mon Sep 17 00:00:00 2001 From: Steven Ayers Date: Tue, 30 Jul 2024 13:02:37 +0100 Subject: [PATCH] Support for ENCODE, DISTKEY and SORTKEY in Redshift (#245) --- mo_sql_parsing/sql_parser.py | 5 ++ mo_sql_parsing/types.py | 1 + tests/test_redshift.py | 152 +++++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+) diff --git a/mo_sql_parsing/sql_parser.py b/mo_sql_parsing/sql_parser.py index 40c3eaa..50c35e6 100644 --- a/mo_sql_parsing/sql_parser.py +++ b/mo_sql_parsing/sql_parser.py @@ -704,6 +704,11 @@ def mult(tokens): ) + Optional(AS.suppress() + infix_notation(query, [])("query")) + Optional(CLUSTER_BY.suppress() + LB + delimited_list(identifier) + RB)("cluster_by") + + ZeroOrMore( + assign("sortkey", LB + delimited_list(identifier) + RB) + | assign("distkey", LB + identifier + RB) + ) + )("create table") definer = Optional(keyword("definer").suppress() + EQ + identifier("definer")) diff --git a/mo_sql_parsing/types.py b/mo_sql_parsing/types.py index 849a51d..a36871f 100644 --- a/mo_sql_parsing/types.py +++ b/mo_sql_parsing/types.py @@ -259,6 +259,7 @@ def get_column_type(expr, identifier, literal_string): | flag("auto_increment") | flag("autoincrement") | assign("comment", literal_string) + | assign("encode", identifier) | assign("character set", identifier) | assign("collate", Optional(EQ) + identifier) | flag("primary key") diff --git a/tests/test_redshift.py b/tests/test_redshift.py index 316f7ee..553ff6c 100644 --- a/tests/test_redshift.py +++ b/tests/test_redshift.py @@ -13,6 +13,158 @@ class TestRedshift(TestCase): + def test_issue245a_encode(self): + sql = f""" + CREATE TABLE IF NOT EXISTS web.customers + ( + customer_number INT ENCODE LZO, + metadata SUPER ENCODE LZO, + email VARCHAR(255) ENCODE LZO, + created_at TIMESTAMP ENCODE AZ64, + last_logged_in_at TIMESTAMP ENCODE AZ64 + ) + """ + result = parse(sql) + self.assertEqual( + result, { + "create table": { + "replace": False, + "name": "web.customers", + "columns": [ + {"name": "customer_number", "type": {"int": {}}, "encode": "LZO"}, + {"name": "metadata", "type": "SUPER", "encode": "LZO"}, + {"name": "email", "type": {"varchar": 255}, "encode": "LZO"}, + {"name": "created_at", "type": {"timestamp": {}}, "encode": "AZ64"}, + {"name": "last_logged_in_at", "type": {"timestamp": {}}, "encode": "AZ64"} + ] + } + } + ) + + def test_issue245b_single_sortkey(self): + sql = f""" + CREATE TABLE IF NOT EXISTS web.customers + ( + customer_number INT, + metadata SUPER, + email VARCHAR(255), + created_at TIMESTAMP, + last_logged_in_at TIMESTAMP + ) + SORTKEY (customer_number) + """ + result = parse(sql) + self.assertEqual( + result, { + "create table": { + "replace": False, + "name": "web.customers", + "columns": [ + {"name": "customer_number", "type": {"int": {}}}, + {"name": "metadata", "type": "SUPER"}, + {"name": "email", "type": {"varchar": 255}}, + {"name": "created_at", "type": {"timestamp": {}}}, + {"name": "last_logged_in_at", "type": {"timestamp": {}}} + ], + "sortkey": "customer_number" + } + } + ) + + def test_issue245c_multiple_sortkeys(self): + sql = f""" + CREATE TABLE IF NOT EXISTS web.customers + ( + customer_number INT, + metadata SUPER, + email VARCHAR(255), + created_at TIMESTAMP, + last_logged_in_at TIMESTAMP + ) + SORTKEY(customer_number, email) + """ + result = parse(sql) + self.assertEqual( + result, { + "create table": { + "replace": False, + "name": "web.customers", + "columns": [ + {"name": "customer_number", "type": {"int": {}}}, + {"name": "metadata", "type": "SUPER"}, + {"name": "email", "type": {"varchar": 255}}, + {"name": "created_at", "type": {"timestamp": {}}}, + {"name": "last_logged_in_at", "type": {"timestamp": {}}} + ], + "sortkey": [ + "customer_number", "email" + ] + } + } + ) + + def test_issue245d_distkey(self): + sql = f""" + CREATE TABLE IF NOT EXISTS web.customers + ( + customer_number INT, + metadata SUPER, + email VARCHAR(255), + created_at TIMESTAMP, + last_logged_in_at TIMESTAMP + ) + DISTKEY(customer_number) + """ + result = parse(sql) + self.assertEqual( + result, { + "create table": { + "replace": False, + "name": "web.customers", + "columns": [ + {"name": "customer_number", "type": {"int": {}}}, + {"name": "metadata", "type": "SUPER"}, + {"name": "email", "type": {"varchar": 255}}, + {"name": "created_at", "type": {"timestamp": {}}}, + {"name": "last_logged_in_at", "type": {"timestamp": {}}} + ], + "distkey": "customer_number" + } + } + ) + + def test_issue245e_combination(self): + sql = f""" + CREATE TABLE IF NOT EXISTS web.customers + ( + customer_number INT ENCODE LZO, + metadata SUPER ENCODE LZO, + email VARCHAR(255) ENCODE LZO, + created_at TIMESTAMP ENCODE AZ64, + last_logged_in_at TIMESTAMP ENCODE AZ64 + ) + DISTKEY (customer_number) + SORTKEY(created_at) + """ + result = parse(sql) + self.assertEqual( + result, { + "create table": { + "replace": False, + "name": "web.customers", + "columns": [ + {"name": "customer_number", "type": {"int": {}}, "encode": "LZO"}, + {"name": "metadata", "type": "SUPER", "encode": "LZO"}, + {"name": "email", "type": {"varchar": 255}, "encode": "LZO"}, + {"name": "created_at", "type": {"timestamp": {}}, "encode": "AZ64"}, + {"name": "last_logged_in_at", "type": {"timestamp": {}}, "encode": "AZ64"} + ], + "distkey": "customer_number", + "sortkey": "created_at" + } + } + ) + def test_issue149a_casting(self): # Ref: https://docs.aws.amazon.com/redshift/latest/dg/r_CAST_function.html#r_CAST_function-examples sql = "select '' :: varchar as placeholder from table"