Skip to content

Commit

Permalink
Merge pull request #251 from klahnakoski/issue-248
Browse files Browse the repository at this point in the history
Issue 248 - postgres regex operators
  • Loading branch information
klahnakoski authored Sep 8, 2024
2 parents 0809abf + 6572313 commit 2f807f3
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 8 deletions.
18 changes: 11 additions & 7 deletions mo_sql_parsing/keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
LIMIT = keyword("limit").suppress()
MINUS = keyword("minus")
NATURAL = keyword("natural")
NOT = keyword("not")
OFFSET = keyword("offset").suppress()
ON = keyword("on").suppress()
ORDER = keyword("order").suppress()
Expand Down Expand Up @@ -86,6 +87,7 @@
PRIMARY_KEY = Group(PRIMARY + KEY).set_parser_name("primary_key")
FOREIGN_KEY = Group(FOREIGN + KEY).set_parser_name("foreign_key")


# SIMPLE OPERATORS
CONCAT = Literal("||").set_parser_name("concat")
MUL = Literal("*").set_parser_name("mul")
Expand Down Expand Up @@ -122,7 +124,9 @@
# https://prestodb.io/docs/current/functions/comparison.html#is-distinct-from-and-is-not-distinct-from
keyword("is not distinct from").set_parser_name("ne!")
)
REGEXP = keyword("regexp").set_parser_name("rgx")
REGEXP = (keyword("regexp") | Literal("~")).set_parser_name("regexp")
REGEXP_I = Literal("~*").set_parser_name("regexp_i")
NOT_REGEXP_I = Literal("!~*").set_parser_name("not_regexp_i")
NEQ = (Literal("!=") | Literal("<>")).set_parser_name("neq")
ASSIGN = Literal(":=").set_parser_name("assign")

Expand All @@ -147,7 +151,6 @@
ELSE = keyword("else").suppress()
IN = keyword("in")
IS = keyword("is")
NOT = keyword("not")
OR = keyword("or")
LATERAL = keyword("lateral")
PIVOT = keyword("pivot")
Expand Down Expand Up @@ -185,7 +188,7 @@
NOT_LIKE = Group(NOT + LIKE).set_parser_name("not_like")
NOT_RLIKE = Group(NOT + RLIKE).set_parser_name("not_rlike")
NOT_IN = Group(NOT + IN).set_parser_name("nin")
NOT_REGEXP = Group(NOT + REGEXP).set_parser_name("not_regexp")
NOT_REGEXP = Group(NOT + keyword("regexp") | Literal("!~")).set_parser_name("not_regexp")
IS_NOT = Group(IS + NOT).set_parser_name("is_not")

_SIMILAR = keyword("similar")
Expand Down Expand Up @@ -324,8 +327,10 @@
"lt": 5,
"gt": 6,
"eq": 7,
"rgx": 7,
"not_rgx": 7,
"regexp": 7,
"not_regexp": 7,
"regexp_i": 7,
"not_regexp_i": 7,
"neq": 7,
"missing": 7,
"exists": 7,
Expand Down Expand Up @@ -409,8 +414,7 @@
AND,
OR,
ASSIGN,
REGEXP,
NOT_REGEXP,
NOT_REGEXP_I | NOT_REGEXP | REGEXP_I | REGEXP,
]

times = ["now", "today", "tomorrow", "eod"]
Expand Down
11 changes: 10 additions & 1 deletion mo_sql_parsing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import ast
import sys
from typing import List

from mo_dots import is_data, is_null, literal_field, unliteral_field
from mo_future import text, number_types, binary_type, flatten
Expand All @@ -22,7 +23,7 @@
class Call(object):
__slots__ = ["op", "args", "kwargs"]

def __init__(self, op, args, kwargs):
def __init__(self, op, args : List, kwargs: Dict):
self.op = op
self.args = args
self.kwargs = kwargs
Expand Down Expand Up @@ -169,6 +170,10 @@ def to_json_operator(tokens):
return Call("exists", tokens[0], {})
else:
return Call("missing", tokens[0], {})
elif op == "regexp_i":
return Call("regexp", [tokens[0], tokens[2]], {"ignore_case": True})
elif op == "not_regexp_i":
return Call("not_regexp", [tokens[0], tokens[2]], {"ignore_case": True})

operands = [tokens[0], tokens[2]]
binary_op = Call(op, operands, {})
Expand Down Expand Up @@ -264,6 +269,10 @@ def to_tuple_call(token, index, string):
"<=>": "eq!", # https://sparkbyexamples.com/apache-hive/hive-relational-arithmetic-logical-operators/
"!=": "neq",
"<>": "neq",
"!~*": "not_regexp_i",
"!~": "not_regexp",
"~*": "regexp_i",
"~": "regexp",
"not in": "nin",
"in": "in",
"is_not": "neq",
Expand Down
35 changes: 35 additions & 0 deletions tests/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,3 +544,38 @@ def test_issue_239_jsonb2(self):
"select": {"value": {"json_get_text": [{"cast": ["name", {"jsonb": {}}]}, {"literal": "field_key"}]}},
}
self.assertEqual(result, expected)

def test_issue_248_regex_operator1(self):
# https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-POSIX-REGEXP
sql = """SELECT 'abc' ~ 'abc'"""
result = parse(sql)
expected = {"select": {"value": {"regexp": [{"literal": "abc"}, {"literal": "abc"}]}}}

def test_issue_248_regex_operator2(self):
sql = """SELECT 'abc' ~* 'abc'"""
try:
result = parse(sql)
except Exception:
pass
with Debugger():
result = parse(sql)
expected = {"select": {"value": {"regexp": [{"literal": "abc"}, {"literal": "abc"}], "ignore_case": True}}}
self.assertEqual(result, expected)

def test_issue_248_regex_operator3(self):
sql = """SELECT 'abc' !~ 'abc'"""
try:
result = parse(sql)
except Exception:
pass
with Debugger():
result = parse(sql)

expected = {"select": {"value": {"not_regexp": [{"literal": "abc"}, {"literal": "abc"}]}}}
self.assertEqual(result, expected)

def test_issue_248_regex_operator4(self):
sql = """SELECT 'abc' !~* 'abc'"""
result = parse(sql)
expected = {"select": {"value": {"not_regexp": [{"literal": "abc"}, {"literal": "abc"}], "ignore_case": True}}}
self.assertEqual(result, expected)

0 comments on commit 2f807f3

Please sign in to comment.