From f58d4cca4f11e2628ce87df08054ba65dd245c75 Mon Sep 17 00:00:00 2001 From: Igor Dejanovic Date: Tue, 20 Feb 2024 17:11:23 +0100 Subject: [PATCH 1/2] refc: cleanup of grammar module --- parglare/grammar.py | 135 ++++++++++++++++++++++---------------------- 1 file changed, 69 insertions(+), 66 deletions(-) diff --git a/parglare/grammar.py b/parglare/grammar.py index ce300b3..d2041c8 100644 --- a/parglare/grammar.py +++ b/parglare/grammar.py @@ -3,6 +3,8 @@ import re from collections import Counter from os import path +from typing import List, Dict, Optional, Callable +from dataclasses import dataclass, field from parglare import termui from parglare.actions import collect, collect_sep, pass_none, pass_single @@ -64,16 +66,14 @@ def __init__(self, name, location=None, imported_with=None, def fqn(self): if self.imported_with: return f"{self.imported_with.fqn}.{self.name}" - else: - return self.name + return self.name @property def action_fqn(self): if self.action_name: if self.imported_with: return f"{self.imported_with.fqn}.{self.action_name}" - else: - return self.action_name + return self.action_name def add_user_meta_data(self, name, value): if self.user_meta is None: @@ -109,8 +109,7 @@ class NonTerminal(GrammarSymbol): """ def __init__(self, name, productions=None, location=None, imported_with=None, user_meta=None): - super().__init__(name, location, imported_with, - user_meta) + super().__init__(name, location, imported_with, user_meta) self.productions = productions if productions is not None else [] @@ -142,8 +141,7 @@ def __init__(self, name, recognizer=None, location=None, self.prefer = False self.dynamic = False self.keyword = False - super().__init__(name, location, imported_with, - user_meta=None) + super().__init__(name, location, imported_with, user_meta=None) @property def recognizer(self): @@ -324,8 +322,7 @@ def __str__(self): if hasattr(self, 'prod_id'): return (s_header("%d:") + " %s " + s_emph("=") + " %s") % (self.prod_id, self.symbol, self.rhs) - else: - return ("%s " + s_emph("=") + " %s") % (self.symbol, self.rhs) + return ("%s " + s_emph("=") + " %s") % (self.symbol, self.rhs) def __repr__(self): return f'Production({str(self)})' @@ -404,6 +401,24 @@ def __init__(self, name, multiplicity, type_name): self.type_name = type_name +@dataclass +class GrammarContext: + """ + Context used to collect grammar information and provide info to actions + during grammar parsing. + + """ + classes: Dict = field(default_factory=dict) + debug: bool = False + debug_colors: bool = False + re_flags: re.RegexFlag = re.MULTILINE + groups: List = field(default_factory=list) + groups_counter: Counter = field(default_factory=Counter) + ignore_case: bool = False + imported_with: Optional['PGFileImport'] = None + inline_terminals: Dict = field(default_factory=dict) + + class PGFile: """Objects of this class represent parglare grammar files. @@ -442,8 +457,9 @@ class PGFile: recognizers (dict of callables): A dict of Python callables used as a terminal recognizers. """ - def __init__(self, productions, terminals=None, classes=None, imports=None, - file_path=None, grammar=None, recognizers=None, + def __init__(self, productions: List[Production], + terminals: Optional[List[Terminal]] = None, classes=None, + imports=None, file_path=None, grammar=None, recognizers=None, imported_with=None): self.productions = productions self.terminals = terminals @@ -453,7 +469,7 @@ def __init__(self, productions, terminals=None, classes=None, imports=None, self.file_path = path.realpath(file_path) if file_path else None self.imported_with = imported_with self.recognizers = recognizers - self.actions = {} + self.actions: Dict[str, Callable] = {} self.collect_and_unify_symbols() @@ -530,11 +546,11 @@ def collect_and_unify_symbols(self): # Check grammar actions for rules/symbols. if new_symbol.action_name and \ - new_symbol.action_name != old_symbol.action_name: - raise GrammarError( - location=new_symbol.location, - message='Multiple different grammar actions ' - f'for rule "{new_symbol.name}".') + new_symbol.action_name != old_symbol.action_name: + raise GrammarError( + location=new_symbol.location, + message='Multiple different grammar actions ' + f'for rule "{new_symbol.name}".') self.nonterminals = nonterminals_by_name self.symbols_by_name = dict(nonterminals_by_name) @@ -545,7 +561,7 @@ def collect_and_unify_symbols(self): self.symbols_by_name['STOP'] = STOP def resolve_references(self): - # Two pass resolving to enable referening symbols created during + # Two pass resolving to enable referring symbols created during # resolving (e.g. multiplicity symbols). for pazz in [True, False]: for production in self.productions: @@ -645,8 +661,7 @@ def resolve_ref(self, symbol_ref, first_pass=False): mult = symbol_ref.multiplicity if mult != MULT_ONE: # If multiplicity is used than we are referring to - # suggared symbol - + # sugared symbol separator = symbol_ref.separator \ if symbol_ref.separator else None @@ -809,9 +824,7 @@ class Grammar(PGFile): def __init__(self, productions=None, terminals=None, classes=None, imports=None, file_path=None, recognizers=None, - start_symbol=None, _no_check_recognizers=False, - re_flags=re.MULTILINE, ignore_case=False, debug=False, - debug_parse=False, debug_colors=False): + start_symbol=None, _no_check_recognizers=False): """ Grammar constructor is not meant to be called directly by the user. See `from_str` and `from_file` static methods instead. @@ -825,12 +838,12 @@ def __init__(self, productions=None, terminals=None, self.imported_files = {} super().__init__(productions=productions, - terminals=terminals, - classes=classes, - imports=imports, - file_path=file_path, - grammar=self, - recognizers=recognizers) + terminals=terminals, + classes=classes, + imports=imports, + file_path=file_path, + grammar=self, + recognizers=recognizers) self._no_check_recognizers = _no_check_recognizers @@ -861,7 +874,7 @@ def _init_grammar(self): 0, Production(AUGSYMBOL, ProductionRHS([self.start_symbol, STOP]))) - self._add_all_symbols_productions() + self._add_all_production_symbols() self._enumerate_productions() self._fix_keyword_terminals() self._resolve_actions() @@ -870,7 +883,7 @@ def _init_grammar(self): if not self._no_check_recognizers: self._connect_override_recognizers() - def _add_all_symbols_productions(self): + def _add_all_production_symbols(self): self.nonterminals = {} for prod in self.productions: @@ -891,6 +904,9 @@ def add_productions(productions): production.rhs[idx] = self.terminals[rhs_elem.fqn] elif isinstance(rhs_elem, NonTerminal): if rhs_elem.fqn not in self.nonterminals: + # This may happen for RHS refs that create new + # productions (e.g. syntactic sugar extensions - *, + # +...) self.productions.extend(rhs_elem.productions) add_productions(rhs_elem.productions) else: @@ -1081,17 +1097,8 @@ def _parse(parse_fun_name, what_to_parse, recognizers=None, ignore_case=False, re_flags=re.MULTILINE, debug=False, debug_parse=False, debug_colors=False, _no_check_recognizers=False): - extra = GrammarContext() - extra.re_flags = re_flags - extra.ignore_case = ignore_case - extra.debug = debug - extra.debug_colors = debug_colors - extra.classes = {} - extra.inline_terminals = {} - extra.groups = [] - extra.groups_counter = Counter() - extra.imported_with = None - extra.grammar = None + extra = GrammarContext(debug=debug, debug_colors=debug_colors, + ignore_case=ignore_case, re_flags=re_flags) grammar_parser = get_grammar_parser(debug_parse, debug_colors) imports, productions, terminals, classes = \ getattr(grammar_parser, parse_fun_name)(what_to_parse, @@ -1139,28 +1146,28 @@ class PGFileImport: module_name (str): Name of this import. By default is the name of grammar file without .pg extension. file_path (str): A canonical full path of the imported .pg file. - extra: grammar parsing extra state. - imported_with (PGFileImport): First import this import is imported from. - Used for FQN calculation. - grammar (Grammar): Grammar object under construction. + context: grammar parsing context state. + imported_with (PGFileImport | None): First import this import is + imported from. Used for FQN calculation. + grammar (Grammar | None): Grammar object under construction. pgfile (PGFile instance or None): """ - def __init__(self, module_name, file_path, extra): + def __init__(self, module_name: str, file_path: str, + context: GrammarContext): self.module_name = module_name - self.file_path = file_path - self.extra = extra - self.imported_with = extra.imported_with - self.grammar = None - self.pgfile = None + self.file_path: str = file_path + self.context = context + self.imported_with: Optional[PGFileImport] = context.imported_with + self.grammar: Optional[Grammar] = None + self.pgfile: Optional[PGFile] = None @property def fqn(self): "A fully qualified name of the import following the first import path." if self.imported_with: return f"{self.imported_with.fqn}.{self.module_name}" - else: - return self.module_name + return self.module_name def load_pgfile(self): if self.pgfile is None: @@ -1169,15 +1176,15 @@ def load_pgfile(self): self.pgfile = self.grammar.imported_files[self.file_path] else: # If not found construct new PGFile - extra = copy.copy(self.extra) - extra.file_name = self.file_path - extra.inline_terminals = {} - extra.imported_with = self + context = copy.copy(self.context) + context.file_name = self.file_path + context.inline_terminals = {} + context.imported_with = self imports, productions, terminals, classes = \ get_grammar_parser( - self.extra.debug, - self.extra.debug_colors).parse_file( - self.file_path, extra=extra) + self.context.debug, + self.context.debug_colors).parse_file( + self.file_path, extra=context) self.pgfile = PGFile(productions=productions, terminals=terminals, classes=classes, @@ -1268,10 +1275,6 @@ def check_name(context, name): message=f'Using dot in names is not allowed ("{name}").') -class GrammarContext: - pass - - # Grammar for grammars (PGFILE, From 32930ad59ce8e3b2c133192fed03a4cd6dd7e7e6 Mon Sep 17 00:00:00 2001 From: Igor Dejanovic Date: Wed, 14 Feb 2024 20:39:29 +0100 Subject: [PATCH 2/2] feat: override of imported grammar rules --- docs/grammar_modularization.md | 36 ++ parglare/common.py | 42 +- parglare/glr.py | 2 +- parglare/grammar.py | 439 +++++++++++--------- parglare/parser.py | 2 +- tests/func/import/override/base.pg | 11 + tests/func/import/override/first.pg | 6 + tests/func/import/override/nonexisting.pg | 5 + tests/func/import/override/second.pg | 5 + tests/func/import/override/test_override.py | 39 ++ 10 files changed, 354 insertions(+), 233 deletions(-) create mode 100644 tests/func/import/override/base.pg create mode 100644 tests/func/import/override/first.pg create mode 100644 tests/func/import/override/nonexisting.pg create mode 100644 tests/func/import/override/second.pg create mode 100644 tests/func/import/override/test_override.py diff --git a/docs/grammar_modularization.md b/docs/grammar_modularization.md index 822b9b8..5a8fbf1 100644 --- a/docs/grammar_modularization.md +++ b/docs/grammar_modularization.md @@ -44,6 +44,42 @@ tests](https://github.com/igordejanovic/parglare/blob/master/tests/func/import/f for an example. +## Imported rules override + +Grammar rules defined in imported grammar can be overridden by using appropriate +FQN rule name. For example, if we have following grammars: + +file: `first.pg` +``` +import 'second.pg' as s; +... + +``` + +file: `second.pg` +``` +import 'third.pg' as t; +... +``` + +file `third.pg` +``` +S: A B+; +B: 'foo' C; +``` + +In `first.pg` we could then write: +``` +S: s.t.S; +s.t.B: 'bar' C+; +``` +Which will effectively override rule `B` from `third.pg` (note FQN `s.t.B`). + +All grammar rules that used old `B` from `third.pg` will now use `s.t.B` rule +from `first.pg`. This enables a flexible modification of existing grammars from +importing grammars just in places where it is needed without having to rewrite +all the rules that use the rule that needs to be modified. + ## Grammar file recognizers diff --git a/parglare/common.py b/parglare/common.py index d918e00..179d4fc 100644 --- a/parglare/common.py +++ b/parglare/common.py @@ -21,14 +21,14 @@ class Location: end and input_str. """ - __slots__ = ['context', 'file_name', - '_line', '_column', - '_line_end', '_column_end'] + __slots__ = ['start_position', 'end_position', 'input_str', 'file_name', + '_line', '_column', '_line_end', '_column_end'] def __init__(self, context=None, file_name=None): - - self.context = context - self.file_name = file_name or context.file_name + self.start_position = context.start_position if context else None + self.end_position = context.end_position if context else None + self.input_str = context.input_str if context else None + self.file_name = file_name or context.file_name if context else None # Evaluate this only when string representation is needed. # E.g. during error reporting @@ -63,40 +63,26 @@ def column_end(self): return self._column_end def evaluate_line_col(self): - context = self.context self._line, self._column = pos_to_line_col( - context.input_str, context.start_position) + self.input_str, self.start_position) def evaluate_line_col_end(self): - context = self.context - if hasattr(context, 'end_position') \ - and context.end_position: + if self.end_position: self._line_end, self._column_end = \ - pos_to_line_col(context.input_str, context.end_position) - - def __getattr__(self, name): - if self.context is not None: - return getattr(self.context, name) - else: - raise AttributeError(name) + pos_to_line_col(self.input_str, self.end_position) def __str__(self): - if self.context is None: - line, column = None, None - else: - line, column = self.line, self.column - context = self.context + line, column = self.line, self.column if line is not None: return ('{}{}:{}:"{}"' .format(f"{self.file_name}:" if self.file_name else "", line, column, - position_context(context.input_str, - context.start_position))) - elif self.file_name: + position_context(self.input_str, + self.start_position))) + if self.file_name: return _a(self.file_name) - else: - return "" + return "" def __repr__(self): return str(self) diff --git a/parglare/glr.py b/parglare/glr.py index c466736..7bec15b 100644 --- a/parglare/glr.py +++ b/parglare/glr.py @@ -511,7 +511,7 @@ def _do_error_recovery(self): successful = self.error_recovery(head, error) if successful: - error.location.context.end_position = head.position + error.location.end_position = head.position if debug: a_print("New position is ", pos_to_line_col(input_str, head.position), diff --git a/parglare/grammar.py b/parglare/grammar.py index d2041c8..76d5e3e 100644 --- a/parglare/grammar.py +++ b/parglare/grammar.py @@ -2,9 +2,9 @@ import itertools import re from collections import Counter -from os import path -from typing import List, Dict, Optional, Callable from dataclasses import dataclass, field +from os import path +from typing import Callable, Dict, List, Optional from parglare import termui from parglare.actions import collect, collect_sep, pass_none, pass_single @@ -168,25 +168,33 @@ class Reference: separator (symbol or Reference): A reference to the separator symbol or the separator symbol itself if resolved. """ - def __init__(self, location, name): + def __init__(self, location: Location, name: str, + imported_with: 'PGFileImport'): self.name = name self.location = location + self.imported_with = imported_with self.multiplicity = MULT_ONE self.greedy = False self.separator = None @property - def multiplicity_name(self): + def multiplicity_fqn(self): """ Returns the name of the symbol that should be used if multiplicity/separator is used. """ - return make_multiplicity_name( - self.name, self.multiplicity, + return make_multiplicity_fqn( + self.fqn, self.multiplicity, self.separator.name if self.separator else None) + @property + def fqn(self): + if self.imported_with: + return f"{self.imported_with.fqn}.{self.name}" + return self.name + def clone(self): - new_ref = Reference(self.location, self.name) + new_ref = Reference(self.location, self.name, self.imported_with) new_ref.multiplicity = self.multiplicity new_ref.separator = self.separator return new_ref @@ -441,7 +449,7 @@ class PGFile: Actions are by default loaded from the file named `_actions.py` where `grammar` is basename of grammar file. Recognizers are loaded from `_recognizers.py`. Actions and recognizers given this way are both - optional. Furthermore, both actions and recognizers can be overriden by + optional. Furthermore, both actions and recognizers can be overridden by supplying actions and/or recognizers dict during grammar/parser instantiation. @@ -471,7 +479,7 @@ def __init__(self, productions: List[Production], self.recognizers = recognizers self.actions: Dict[str, Callable] = {} - self.collect_and_unify_symbols() + self._make_symbols_resolution_map() if self.file_path: self.grammar.imported_files[self.file_path] = self @@ -489,15 +497,14 @@ def __init__(self, productions: List[Production], else: self.imports = {} - self.resolve_references() - self.load_actions() - self.load_recognizers() - - def collect_and_unify_symbols(self): - """Collect non-terminals and terminals (both explicit and implicit/inline) - defined in this file and make sure there is only one instance for each - of them. + self._check_overrides() + self._load_actions() + self._load_recognizers() + def _make_symbols_resolution_map(self): + """ + Collect non-terminals and terminals and make dicts for resolving + by name. """ nonterminals_by_name = {} terminals_by_name = {} @@ -509,7 +516,8 @@ def collect_and_unify_symbols(self): if terminal.name in terminals_by_name: raise GrammarError( location=terminal.location, - message=f'Multiple definitions of terminal rule "{terminal.name}"') + message=f'Multiple definitions of terminal ' + f'rule "{terminal.name}"') if isinstance(terminal.recognizer, StringRecognizer): rec = terminal.recognizer if rec.value in terminals_by_str_rec: @@ -523,7 +531,6 @@ def collect_and_unify_symbols(self): terminals_by_name[terminal.name] = terminal self.terminals = terminals_by_name - self.terminals_by_str_rec = terminals_by_str_rec # Collect non-terminals for production in self.productions: @@ -560,19 +567,31 @@ def collect_and_unify_symbols(self): self.symbols_by_name['EMPTY'] = EMPTY self.symbols_by_name['STOP'] = STOP - def resolve_references(self): - # Two pass resolving to enable referring symbols created during - # resolving (e.g. multiplicity symbols). - for pazz in [True, False]: - for production in self.productions: - for idx, ref in enumerate(production.rhs): - if isinstance(ref, Reference): - production.rhs[idx] = self.resolve_ref(ref, pazz) - - def register_symbol(self, symbol): - self.symbols_by_name[symbol.name] = symbol + def _check_overrides(self): + """ + Check that all overrides defined in the current file are + valid FQNs. Just to be sure that typos don't go unnoticed. + """ + for symbol_fqn, symbol in self.symbols_by_name.items(): + # Must resolve first level without resolve_symbol_by_name + # as otherwise the override rule itself would be found. + if '.' in symbol_fqn: + import_module_name, name = symbol_fqn.split('.', 1) + try: + imported_pg_file = self.imports[import_module_name] + if not imported_pg_file.resolve_symbol_by_name(name): + raise GrammarError( + location=symbol.location, + message=f"Unexisting name for symbol " + f"override {symbol_fqn}." + ) + except KeyError as ex_inner: + raise GrammarError( + location=symbol.location, + message=f'Unexisting module "{import_module_name}"' + f' in reference "{symbol_fqn}"') from ex_inner - def load_actions(self): + def _load_actions(self): """ Loads actions from _actions.py if the file exists. Actions must be collected with action decorator and the decorator must @@ -596,10 +615,10 @@ def load_actions(self): 'decorator defined.') self.actions = actions_module.action.all - def load_recognizers(self): - """Load recognizers from _recognizers.py. Override + def _load_recognizers(self): + """ + Load recognizers from _recognizers.py. Override with provided recognizers. - """ if self.file_path: recognizers_file = path.join( @@ -632,31 +651,179 @@ def load_recognizers(self): .format(recognizer_name)) symbol.recognizer = recognizer - def resolve_ref(self, symbol_ref, first_pass=False): + def resolve_symbol_by_name( + self, symbol_fqn: str, + location: Optional[Location] = None) -> Optional[GrammarSymbol]: + """ + Resolve symbol by FQN. Respect overrides. + """ + try: + # Try to get local symbol by FQN in order to override symbols from + # imported grammars. + return self.symbols_by_name[symbol_fqn] + except KeyError: + if '.' in symbol_fqn: + import_module_name, name = symbol_fqn.split('.', 1) + try: + imported_pg_file = self.imports[import_module_name] + except KeyError as ex_inner: + raise GrammarError( + location=location, + message=f'Unexisting module "{import_module_name}"' + f' in reference "{symbol_fqn}"') from ex_inner + return imported_pg_file.resolve_symbol_by_name(name, location) + return None + + def resolve_action_by_name(self, action_name: str) -> Optional[Callable]: + """ + Return registered action for the given action's FQN. + """ + if action_name in self.actions: + return self.actions[action_name] + if '.' in action_name: + import_module_name, name = action_name.split('.', 1) + if import_module_name in self.imports: + imported_pg_file = self.imports[import_module_name] + return imported_pg_file.resolve_action_by_name(name) + return None + + +class Grammar(PGFile): + """ + Grammar is a collection of production rules, nonterminals and terminals. + First production is reserved for the augmented production (S' -> S). + + Attributes: + start_symbol (GrammarSymbol or str): start/root symbol of the grammar or + its name. + nonterminals (set of NonTerminal): + terminals(set of Terminal): + imported_files(dict): Global registry of all imported files. + + """ + + def __init__(self, productions=None, terminals=None, + classes=None, imports=None, file_path=None, recognizers=None, + start_symbol=None, _no_check_recognizers=False): + """ + Grammar constructor is not meant to be called directly by the user. + See `from_str` and `from_file` static methods instead. + + Arguments: + see Grammar attributes. + _no_check_recognizers (bool, internal): Used by pglr tool to circumvent + errors for empty recognizers that will be provided in user code. + """ + + self.imported_files = {} + + super().__init__(productions=productions, + terminals=terminals, + classes=classes, + imports=imports, + file_path=file_path, + grammar=self, + recognizers=recognizers) + + self._no_check_recognizers = _no_check_recognizers + + # Determine start symbol. If name is provided search for it. If name is + # not given use the first production LHS symbol as the start symbol. + if start_symbol: + if isinstance(start_symbol, str): + for p in self.productions: + if p.symbol.name == start_symbol: + self.start_symbol = p.symbol + else: + self.start_symbol = start_symbol + else: + # By default, first production symbol is the start symbol. + self.start_symbol = self.productions[0].symbol + + self._init_grammar() + + def _init_grammar(self): + """ + Extracts all grammar symbol (nonterminal and terminal) from the + grammar, resolves and check references in productions, unify all + grammar symbol objects and enumerate productions. + """ + # Reserve 0 production. It is used for augmented prod. in LR + # automata calculation. + self.productions.insert( + 0, + Production(AUGSYMBOL, ProductionRHS([self.start_symbol, STOP]))) + + self._add_resolve_all_production_symbols() + self._enumerate_productions() + self._fix_keyword_terminals() + self._resolve_actions() + + # Connect recognizers, override grammar provided + if not self._no_check_recognizers: + self._connect_override_recognizers() + + def _add_resolve_all_production_symbols(self): + """ + Registers all grammar symbols and resolve RHS of each production. + """ + + self.nonterminals = {} + for prod in self.productions: + self.nonterminals[prod.symbol.fqn] = prod.symbol + self.terminals.update([(s.name, s) for s in (EMPTY, STOP)]) + + def add_productions(productions): + for production in productions: + symbol = production.symbol + if symbol.fqn not in self.nonterminals: + self.nonterminals[symbol.fqn] = symbol + for idx, rhs_elem in enumerate(production.rhs): + if isinstance(rhs_elem, Reference): + rhs_elem = production.rhs[idx] = \ + self._resolve_ref(rhs_elem) + if isinstance(rhs_elem, Terminal): + if rhs_elem.fqn not in self.terminals: + self.terminals[rhs_elem.fqn] = rhs_elem + else: + # Unify terminals + production.rhs[idx] = self.terminals[rhs_elem.fqn] + elif isinstance(rhs_elem, NonTerminal): + if rhs_elem.fqn not in self.nonterminals: + # This may happen for RHS refs that create new + # productions (e.g. syntactic sugar extensions - *, + # +...) + self.productions.extend(rhs_elem.productions) + add_productions(rhs_elem.productions) + else: + # This should never happen + raise AssertionError( + f"Invalid RHS element type '{type(rhs_elem)}'.") + add_productions(list(self.productions)) + + def register_symbol(self, symbol): + self.symbols_by_name[symbol.name] = symbol + + def _resolve_ref(self, symbol_ref): """Resolves given symbol reference. For local name search this file, for FQN use imports and delegate to imported file. - On each resolved symbol productions in the root file are updated. - If this is first pass do not fail on unexisting reference as there might be new symbols created during resolving (e.g. multiplicity symbols). """ if isinstance(symbol_ref.separator, Reference): - symbol_ref.separator = self.resolve_ref(symbol_ref.separator) + symbol_ref.separator = self._resolve_ref(symbol_ref.separator) - symbol_name = symbol_ref.name - symbol = self.resolve_symbol_by_name(symbol_name, symbol_ref.location) + symbol_fqn = symbol_ref.fqn + symbol = self.resolve_symbol_by_name(symbol_fqn, symbol_ref.location) if not symbol: - if first_pass: - return symbol_ref - else: - raise GrammarError( - location=symbol_ref.location, - message=f'Unknown symbol "{symbol_name}"') + raise GrammarError( + location=symbol_ref.location, + message=f'Unknown symbol "{symbol_fqn}"') mult = symbol_ref.multiplicity if mult != MULT_ONE: @@ -666,45 +833,19 @@ def resolve_ref(self, symbol_ref, first_pass=False): if symbol_ref.separator else None base_symbol = symbol - symbol_name = symbol_ref.multiplicity_name + symbol_name = symbol_ref.multiplicity_fqn symbol = self.resolve_symbol_by_name(symbol_name, symbol_ref.location) if not symbol: # If there is no multiplicity version of the symbol we # will create one at this place - symbol = self.make_multiplicity_symbol( + symbol = self._make_multiplicity_symbol( symbol_ref, base_symbol, separator, self.imported_with) return symbol - def resolve_symbol_by_name(self, symbol_name, location=None): - """ - Resolves symbol by fqn. - """ - if '.' in symbol_name: - import_module_name, name = symbol_name.split('.', 1) - try: - imported_pg_file = self.imports[import_module_name] - except KeyError as ex: - raise GrammarError( - location=location, - message='Unexisting module "{}" in reference "{}"' - .format(import_module_name, symbol_name)) from ex - return imported_pg_file.resolve_symbol_by_name(name, location) - else: - return self.symbols_by_name.get(symbol_name, None) - - def resolve_action_by_name(self, action_name): - if action_name in self.actions: - return self.actions[action_name] - elif '.' in action_name: - import_module_name, name = action_name.split('.', 1) - if import_module_name in self.imports: - imported_pg_file = self.imports[import_module_name] - return imported_pg_file.resolve_action_by_name(name) - - def make_multiplicity_symbol(self, symbol_ref, base_symbol, separator, - imported_with): + def _make_multiplicity_symbol(self, symbol_ref, base_symbol, separator, + imported_with): """ Creates new NonTerminal for symbol refs using multiplicity and separators. @@ -712,8 +853,8 @@ def make_multiplicity_symbol(self, symbol_ref, base_symbol, separator, mult = symbol_ref.multiplicity assoc = ASSOC_RIGHT if symbol_ref.greedy else ASSOC_NONE if mult in [MULT_ONE_OR_MORE, MULT_ZERO_OR_MORE]: - symbol_name = make_multiplicity_name( - symbol_ref.name, MULT_ONE_OR_MORE, + symbol_name = make_multiplicity_fqn( + symbol_ref.fqn, MULT_ONE_OR_MORE, separator.name if separator else None) symbol = self.resolve_symbol_by_name(symbol_name) if not symbol: @@ -745,8 +886,8 @@ def make_multiplicity_symbol(self, symbol_ref, base_symbol, separator, if mult == MULT_ZERO_OR_MORE: productions = [] symbol_one = symbol - symbol_name = make_multiplicity_name( - symbol_ref.name, mult, + symbol_name = make_multiplicity_fqn( + symbol_ref.fqn, mult, separator.name if separator else None) symbol = NonTerminal(symbol_name, productions, base_symbol.location, @@ -763,8 +904,7 @@ def make_multiplicity_symbol(self, symbol_ref, base_symbol, separator, def action(_, nodes): if nodes: return nodes[0] - else: - return [] + return [] symbol.grammar_action = action @@ -791,7 +931,7 @@ def action(_, nodes): message='Repetition modifier not allowed for ' f'optional (?) for symbol "{symbol_ref.name}".') productions = [] - symbol_name = make_multiplicity_name(symbol_ref.name, mult) + symbol_name = make_multiplicity_fqn(symbol_ref.fqn, mult) symbol = NonTerminal(symbol_name, productions, base_symbol.location, imported_with=imported_with) @@ -807,114 +947,6 @@ def action(_, nodes): return symbol - -class Grammar(PGFile): - """ - Grammar is a collection of production rules, nonterminals and terminals. - First production is reserved for the augmented production (S' -> S). - - Attributes: - start_symbol (GrammarSymbol or str): start/root symbol of the grammar or - its name. - nonterminals (set of NonTerminal): - terminals(set of Terminal): - imported_files(dict): Global registry of all imported files. - - """ - - def __init__(self, productions=None, terminals=None, - classes=None, imports=None, file_path=None, recognizers=None, - start_symbol=None, _no_check_recognizers=False): - """ - Grammar constructor is not meant to be called directly by the user. - See `from_str` and `from_file` static methods instead. - - Arguments: - see Grammar attributes. - _no_check_recognizers (bool, internal): Used by pglr tool to circumvent - errors for empty recognizers that will be provided in user code. - """ - - self.imported_files = {} - - super().__init__(productions=productions, - terminals=terminals, - classes=classes, - imports=imports, - file_path=file_path, - grammar=self, - recognizers=recognizers) - - self._no_check_recognizers = _no_check_recognizers - - # Determine start symbol. If name is provided search for it. If name is - # not given use the first production LHS symbol as the start symbol. - if start_symbol: - if isinstance(start_symbol, str): - for p in self.productions: - if p.symbol.name == start_symbol: - self.start_symbol = p.symbol - else: - self.start_symbol = start_symbol - else: - # By default, first production symbol is the start symbol. - self.start_symbol = self.productions[0].symbol - - self._init_grammar() - - def _init_grammar(self): - """ - Extracts all grammar symbol (nonterminal and terminal) from the - grammar, resolves and check references in productions, unify all - grammar symbol objects and enumerate productions. - """ - # Reserve 0 production. It is used for augmented prod. in LR - # automata calculation. - self.productions.insert( - 0, - Production(AUGSYMBOL, ProductionRHS([self.start_symbol, STOP]))) - - self._add_all_production_symbols() - self._enumerate_productions() - self._fix_keyword_terminals() - self._resolve_actions() - - # Connect recognizers, override grammar provided - if not self._no_check_recognizers: - self._connect_override_recognizers() - - def _add_all_production_symbols(self): - - self.nonterminals = {} - for prod in self.productions: - self.nonterminals[prod.symbol.fqn] = prod.symbol - self.terminals.update([(s.name, s) for s in (EMPTY, STOP)]) - - def add_productions(productions): - for production in productions: - symbol = production.symbol - if symbol.fqn not in self.nonterminals: - self.nonterminals[symbol.fqn] = symbol - for idx, rhs_elem in enumerate(production.rhs): - if isinstance(rhs_elem, Terminal): - if rhs_elem.fqn not in self.terminals: - self.terminals[rhs_elem.fqn] = rhs_elem - else: - # Unify terminals - production.rhs[idx] = self.terminals[rhs_elem.fqn] - elif isinstance(rhs_elem, NonTerminal): - if rhs_elem.fqn not in self.nonterminals: - # This may happen for RHS refs that create new - # productions (e.g. syntactic sugar extensions - *, - # +...) - self.productions.extend(rhs_elem.productions) - add_productions(rhs_elem.productions) - else: - # This should never happen - raise AssertionError( - f"Invalid RHS element type '{type(rhs_elem)}'.") - add_productions(list(self.productions)) - def _enumerate_productions(self): """ Enumerates all productions (prod_id) and production per symbol @@ -1234,19 +1266,21 @@ def create_productions_terminals(productions): if t not in inline_terminals: inline_terminals[t] = \ Terminal(recognizer=StringRecognizer(t), name=t) - rhs[idx] = Reference(location=None, name=t) + rhs[idx] = Reference(location=None, name=t, + imported_with=symbol.imported_with) elif isinstance(t, Terminal): if t.name not in inline_terminals: inline_terminals[t.name] = t - rhs[idx] = Reference(location=None, name=t.name) + rhs[idx] = Reference(location=None, name=t.name, + imported_with=symbol.imported_with) gp.append(Production(symbol, rhs, assoc=assoc, prior=prior)) return gp, list(inline_terminals.values()) -def make_multiplicity_name(symbol_name, multiplicity=None, - separator_name=None): +def make_multiplicity_fqn(symbol_name, multiplicity=None, + separator_name=None): if multiplicity is None or multiplicity == MULT_ONE: return symbol_name name_by_mult = { @@ -1269,10 +1303,6 @@ def check_name(context, name): raise GrammarError( location=Location(context), message=f'Rule name "{name}" is reserved.') - if '.' in name: - raise GrammarError( - location=Location(context), - message=f'Using dot in names is not allowed ("{name}").') # Grammar for grammars @@ -1749,7 +1779,7 @@ def act_production_group(context, nodes): # Group name will be known when the grammar rule is # reduced so store these production for later. productions = nodes[1] - reference = Reference(Location(context), 'resolving') + reference = Reference(Location(context), 'resolving', context.extra.imported_with) context.extra.groups.append((reference, productions)) return reference @@ -1849,7 +1879,8 @@ def act_gsymbol_reference(context, nodes): sep_ref = None if modifiers: sep_ref = modifiers[1] - sep_ref = Reference(Location(context), sep_ref) + sep_ref = Reference(Location(context), sep_ref, + context.extra.imported_with) symbol_ref.separator = sep_ref if rep_op.startswith('*'): @@ -1868,7 +1899,8 @@ def act_gsymbol_reference(context, nodes): def act_gsymbol_string_recognizer(context, nodes): recognizer = act_recognizer_str(context, nodes) - terminal_ref = Reference(Location(context), recognizer.name) + terminal_ref = Reference(Location(context), recognizer.name, + context.extra.imported_with) if terminal_ref.name not in context.extra.inline_terminals: check_name(context, terminal_ref.name) @@ -1944,7 +1976,8 @@ def act_regex_term(context, value): 'GrammarSymbolReference': act_gsymbol_reference, 'GrammarSymbol': [lambda context, nodes: Reference(Location(context), - nodes[0]), + nodes[0], + context.extra.imported_with), act_gsymbol_string_recognizer], 'Recognizer': [act_recognizer_str, act_recognizer_regex], diff --git a/parglare/parser.py b/parglare/parser.py index 79159d5..57f5b64 100644 --- a/parglare/parser.py +++ b/parglare/parser.py @@ -784,7 +784,7 @@ def _do_recovery(self): if successful: if debug: h_print("Recovery ") - error.location.context.end_position = head.position + error.location.end_position = head.position if debug: a_print("New position is ", pos_to_line_col(head.input_str, head.position), diff --git a/tests/func/import/override/base.pg b/tests/func/import/override/base.pg new file mode 100644 index 0000000..1460eec --- /dev/null +++ b/tests/func/import/override/base.pg @@ -0,0 +1,11 @@ +import 'first.pg' as f; + +S: f.s.A; + +// This rule overrides imported rule C from the second grammar +// Each rule that reference old rule C should now +// reference overriden rule. +f.s.C: 'k' f.s.B; + +terminals +f.s.B: 'bb'; // also all rules referencing terminal B now use overriden version diff --git a/tests/func/import/override/first.pg b/tests/func/import/override/first.pg new file mode 100644 index 0000000..9b313e2 --- /dev/null +++ b/tests/func/import/override/first.pg @@ -0,0 +1,6 @@ +import 'second.pg' as s; +S: s.A s.C; + +terminals +// This grammar override terminal match; +s.B: 'bf'; diff --git a/tests/func/import/override/nonexisting.pg b/tests/func/import/override/nonexisting.pg new file mode 100644 index 0000000..dbf4e02 --- /dev/null +++ b/tests/func/import/override/nonexisting.pg @@ -0,0 +1,5 @@ +import 'first.pg' as f; + +S: f.S; + +f.NonExisting: f.s.B; diff --git a/tests/func/import/override/second.pg b/tests/func/import/override/second.pg new file mode 100644 index 0000000..3ad77eb --- /dev/null +++ b/tests/func/import/override/second.pg @@ -0,0 +1,5 @@ +A: B+ C; +C: 'sec' B; + +terminals +B: 'bs'; diff --git a/tests/func/import/override/test_override.py b/tests/func/import/override/test_override.py new file mode 100644 index 0000000..1a253c0 --- /dev/null +++ b/tests/func/import/override/test_override.py @@ -0,0 +1,39 @@ +import os + +import pytest + +from parglare import Grammar, GrammarError, Parser + +this_folder = os.path.dirname(__file__) + + +def test_override_base(): + """ + Test overrides with two level of nesting. + """ + g = Grammar.from_file(os.path.join(this_folder, 'base.pg')) + p = Parser(g) + result = p.parse('bb bb k bb') + assert result + + +def test_override_first(): + """ + Loading grammar from the lower level of import hierarchy works correctly + also. + """ + g = Grammar.from_file(os.path.join(this_folder, 'first.pg')) + p = Parser(g) + result = p.parse('bf bf sec bf sec bf') + assert result + + +def test_override_nonexisting_symbol(): + """ + Test override that doesn't exist. By default it could go unnoticed and + the intended rule would not be overriden. This verifies that typo errors + would not go unnoticed. + """ + with pytest.raises(GrammarError, + match='Unexisting name for symbol override f.NonExisting'): + Grammar.from_file(os.path.join(this_folder, 'nonexisting.pg'))