Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the semantics of parentheses #5

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 43 additions & 30 deletions nfa/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,17 @@ def pattern(pattern_string):
lexer = Lexer(pattern_string)
lexer.advance()
nfa_pair = NfaPair()
group(nfa_pair)
expr(nfa_pair)
# log_nfa(nfa_pair.start_node)

return nfa_pair.start_node


"""
group ::= ("(" expr ")")*
expr ::= factor_conn ("|" factor_conn)*
factor_conn ::= factor | factor factor*
factor ::= (term | term ("*" | "+" | "?"))*
term ::= char | "[" char "-" char "]" | .
term ::= char | "[" char "-" char "]" | . | "(" expr ")"
"""


Expand All @@ -44,6 +43,8 @@ def term(pair_out):
nfa_dot_char(pair_out)
elif lexer.match(Token.CCL_START):
nfa_set_nega_char(pair_out)
elif lexer.match(Token.OPEN_PAREN):
nfa_paren_around(pair_out)


# 匹配单个字符
Expand Down Expand Up @@ -147,7 +148,6 @@ def factor_conn(pair_out):

def is_conn(token):
nc = [
Token.OPEN_PAREN,
Token.CLOSE_PAREN,
Token.AT_EOL,
Token.EOS,
Expand Down Expand Up @@ -226,6 +226,19 @@ def nfa_option_closure(pair_out):
return True


# ()
def nfa_paren_around(pair_out):
if not lexer.match(Token.OPEN_PAREN):
return False

lexer.advance()
expr(pair_out)
if not lexer.match(Token.CLOSE_PAREN):
return False
lexer.advance()
return True


def expr(pair_out):
factor_conn(pair_out)
pair = NfaPair()
Expand All @@ -246,32 +259,32 @@ def expr(pair_out):
return True


def group(pair_out):
if lexer.match(Token.OPEN_PAREN):
lexer.advance()
expr(pair_out)
if lexer.match(Token.CLOSE_PAREN):
lexer.advance()
elif lexer.match(Token.EOS):
return False
else:
expr(pair_out)

while True:
pair = NfaPair()
if lexer.match(Token.OPEN_PAREN):
lexer.advance()
expr(pair)
pair_out.end_node.next_1 = pair.start_node
pair_out.end_node = pair.end_node
if lexer.match(Token.CLOSE_PAREN):
lexer.advance()
elif lexer.match(Token.EOS):
return False
else:
expr(pair)
pair_out.end_node.next_1 = pair.start_node
pair_out.end_node = pair.end_node
# def group(pair_out):
# if lexer.match(Token.OPEN_PAREN):
# lexer.advance()
# expr(pair_out)
# if lexer.match(Token.CLOSE_PAREN):
# lexer.advance()
# elif lexer.match(Token.EOS):
# return False
# else:
# expr(pair_out)

# while True:
# pair = NfaPair()
# if lexer.match(Token.OPEN_PAREN):
# lexer.advance()
# expr(pair)
# pair_out.end_node.next_1 = pair.start_node
# pair_out.end_node = pair.end_node
# if lexer.match(Token.CLOSE_PAREN):
# lexer.advance()
# elif lexer.match(Token.EOS):
# return False
# else:
# expr(pair)
# pair_out.end_node.next_1 = pair.start_node
# pair_out.end_node = pair.end_node



7 changes: 7 additions & 0 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ def __init__(self, str, pattern, result):
testLists.append(RegexMaterial("abbbbb", "[^c]+", True))
testLists.append(RegexMaterial("ccccc", "[^c]+", False))
testLists.append(RegexMaterial("123", "[1-3]+", True))
testLists.append(RegexMaterial("ad", "a(bc)*d", True))
testLists.append(RegexMaterial("abcd", "a(bc)*d", True))
testLists.append(RegexMaterial("abcbcd", "a(bc)*d", True))
testLists.append(RegexMaterial("abcdef", "a(b(cd)*e)?f", True))
testLists.append(RegexMaterial("abef", "a(b(cd)*e)?f", True))
testLists.append(RegexMaterial("af", "a(b(cd)*e)?f", True))
testLists.append(RegexMaterial("abf", "a(b(cd)*e)?f", False))

class TestRegex(unittest.TestCase):
def test(self):
Expand Down