From ad488f84d41bf3a489e983a0ae898a58e0f9b1c5 Mon Sep 17 00:00:00 2001 From: "R. Bernstein" Date: Tue, 19 Nov 2024 19:59:40 -0500 Subject: [PATCH] Include no-meaning operators in JSON op extraction (#85) Include no-meaning operators in JSON op extraction, separated by affix: Infix, Postifx, or Prefix. Also, update "meaningful" field comment at the top of the YAML file. Add a unit test for operator-table consistency. --- mathics_scanner/data/operators.yml | 8 +- .../generate/build_operator_tables.py | 44 ++++++-- test/test_operators.py | 104 ++++++++++++++++++ 3 files changed, 145 insertions(+), 11 deletions(-) create mode 100644 test/test_operators.py diff --git a/mathics_scanner/data/operators.yml b/mathics_scanner/data/operators.yml index 8bc239b..c423a61 100644 --- a/mathics_scanner/data/operators.yml +++ b/mathics_scanner/data/operators.yml @@ -43,6 +43,7 @@ # - NullAry (0 arguments), # - Unary (1 argument), # - Binary (2 arguments) +# - Infix (2 or more arguments; Binary is a special case of Infix having exactly 2 argumetns) # - Ternary (3 arguments) # - n-ary (n arguments) # @@ -82,8 +83,9 @@ # - Left # - Missing["Unknown"] # -# meaningful: boolean ?? - +# meaningful: "true" if WMA defines a meaning for the operator and "false" if not. +# See "Operators without Built-in Meanings" +# https://reference.wolfram.com/language/tutorial/TextualInputAndOutput.html#41 AddTo: actual-precedence: 120 @@ -6609,7 +6611,7 @@ Star: # N-tokens: {} # L-tokens: {"⋆"} # O-tokens: {} - # usage: "expr1 ⋆ expr2" + # usage: "expr1 ⋆ expr2 ⋆ expr3" # parse: {"Star", "[", "expr1", ",", "expr2", "]"} FullForm: Star[expr1, expr2] arity: Binary diff --git a/mathics_scanner/generate/build_operator_tables.py b/mathics_scanner/generate/build_operator_tables.py index bfafe8f..6894f16 100644 --- a/mathics_scanner/generate/build_operator_tables.py +++ b/mathics_scanner/generate/build_operator_tables.py @@ -41,7 +41,9 @@ def read(*rnames) -> str: return open(osp.join(get_srcdir(), *rnames)).read() -def compile_tables(data: Dict[str, dict]) -> Dict[str, dict]: +def compile_tables( + operator_data: Dict[str, dict], character_data: Dict[str, dict] +) -> Dict[str, dict]: """ Compiles the general table into the tables used internally by the library. This facilitates fast access of this information by clients needing this @@ -49,11 +51,36 @@ def compile_tables(data: Dict[str, dict]) -> Dict[str, dict]: """ operator_precedence = {} - for k, v in data.items(): + for k, v in operator_data.items(): operator_precedence[k] = v["precedence"] + no_meaning_infix_operators = {} + no_meaning_prefix_operators = {} + no_meaning_postfix_operators = {} + + for operator_name, operator_info in operator_data.items(): + if operator_info.get("meaningful", True) is False and ( + character_info := character_data.get(operator_name) + ): + if (unicode_char := character_info.get("unicode-equivalent")) is None: + if (unicode_char := character_info.get("wl-unicode")) is None: + print(f"FIXME: no unicode or WMA equivalent for {operator_name}") + continue + + affix = operator_info["affix"] + if affix == "Infix": + no_meaning_infix_operators[operator_name] = unicode_char + elif affix == "Postfix": + no_meaning_postfix_operators[operator_name] = unicode_char + elif affix == "Prefix": + no_meaning_prefix_operators[operator_name] = unicode_char + else: + print(f"FIXME: affix {affix} of {operator_name} not handled") return { "operator-precedence": operator_precedence, + "no-meaning-infix-operators": no_meaning_infix_operators, + "no-meaning-postfix-operators": no_meaning_postfix_operators, + "no-meaning-prefix-operators": no_meaning_prefix_operators, } @@ -67,20 +94,21 @@ def compile_tables(data: Dict[str, dict]) -> Dict[str, dict]: "-o", show_default=True, type=click.Path(writable=True), - default=DEFAULT_DATA_DIR / "operators-next.json", + default=DEFAULT_DATA_DIR / "operators.json", ) @click.argument( "data_dir", type=click.Path(readable=True), default=DEFAULT_DATA_DIR, required=False ) def main(output, data_dir): - with open(data_dir / "operators.yml", "r", encoding="utf8") as i, open( - output, "w" - ) as o: + with open(data_dir / "operators.yml", "r", encoding="utf8") as operator_f, open( + data_dir / "named-characters.yml", "r", encoding="utf8" + ) as character_f, open(output, "w") as o: # Load the YAML data. - data = yaml.load(i, Loader=yaml.FullLoader) + operator_data = yaml.load(operator_f, Loader=yaml.FullLoader) + character_data = yaml.load(character_f, Loader=yaml.FullLoader) # Precompile the tables. - data = compile_tables(data) + data = compile_tables(operator_data, character_data) # Dump the preprocessed dictionaries to disk as JSON. json.dump(data, o) diff --git a/test/test_operators.py b/test/test_operators.py new file mode 100644 index 0000000..580bbfe --- /dev/null +++ b/test/test_operators.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- + +import os.path as osp +from pathlib import Path + +import yaml + +data_dir = Path(osp.normpath(osp.dirname(__file__)), "..", "mathics_scanner", "data") +with open(data_dir / "operators.yml", "r", encoding="utf8") as operator_f, open( + data_dir / "named-characters.yml", "r", encoding="utf8" +) as character_f: + # Load the YAML data. + operator_data = yaml.load(operator_f, Loader=yaml.FullLoader) + character_data = yaml.load(character_f, Loader=yaml.FullLoader) + + +def test_operators(): + # We need to use "operator-name" instead of YAML "name" key + # because of situations like "FunctionAmpersand" + # which is the same as "Function", but "Function" is already + # needed/used as a YAML key. Apply3Ats (MapApply) is another + # example. + character_operator_names = set( + [ + value["operator-name"] + for value in operator_data.values() + if "operator-name" in value + ] + ) + operator_names = set(tuple(operator_data.keys())) + + left_character_operators = { + operator_name + for operator_name in character_operator_names + if operator_name.startswith("Left") + } + right_character_operators = { + operator_name + for operator_name in character_operator_names + if operator_name.startswith("Right") + } + + # For "Left" operators listed in name characters, check that there is a corresponding "Right" + # and check that the name without "Left" or "Right" appears in the operator table. + left_operator_remove = set() + for left_operator in left_character_operators: + if left_operator in operator_names: + continue + operator_name = left_operator[len("Left") :] + right_operator = "Right" + operator_name + assert right_operator in right_character_operators + assert operator_name in operator_names + # print(f"WOOT short found: {operator_name}") + left_operator_remove.add(left_operator) + + right_operator_remove = set() + for right_operator in right_character_operators: + if right_operator in operator_names: + continue + operator_name = right_operator[len("Right") :] + left_operator = "Left" + operator_name + assert left_operator in left_character_operators + character_operator_names.remove(right_operator) + assert operator_name in operator_names + operator_names.remove(operator_name) + right_operator_remove.add(right_operator) + + character_operator_names -= left_operator_remove + character_operator_names -= right_operator_remove + + # For some reason we decided to exclude "Prefix" as a character operator. Add it back in here + character_operator_names.add("Prefix") + + extra_character_operators = character_operator_names - operator_names + + # FIXME: go over tables to make the below work + # extra_operator_names = operator_names - character_operator_names + # assert not extra_operator_names, f"Should not have extra operators in YAML operator table {extra_operator_names}" + + assert ( + not extra_character_operators + ), f"Should not have extra operators in JSON character table {extra_character_operators}" + + +def test_meaningful_affix(): + """ + Check that all operators where the "meaningful" field is "false" have an valid affix value. + """ + for operator_name, operator_info in operator_data.items(): + if operator_info.get("meaningful", True) is False and ( + character_info := character_data.get(operator_name) + ): + if (character_info.get("unicode-equivalent")) is None: + assert ( + character_info.get("wl-unicode") is not None + ), f"no unicode or WMA equivalent for {operator_name}" + continue + + affix = operator_info["affix"] + assert affix in ( + "Infix", + "Postfix", + "Prefix", + ), f"affix {affix} of {operator_name} not handled"