Include no-meaning operators in JSON op extraction (#85)

Include no-meaning operators in JSON op extraction, separated by affix: Infix, Postifx, or Prefix. Also, update "meaningful" field comment at the top of the YAML file. Add a unit test for operator-table consistency.
Mathics3 · Nov 20, 2024 · ad488f8 · ad488f8
1 parent 051cb0c
commit ad488f8
Show file tree

Hide file tree

Showing 3 changed files with 145 additions and 11 deletions.
diff --git a/mathics_scanner/data/operators.yml b/mathics_scanner/data/operators.yml
@@ -43,6 +43,7 @@
 #  - NullAry (0 arguments),
 #  - Unary (1 argument),
 #  - Binary (2 arguments)
+#  - Infix (2 or more arguments; Binary is a special case of Infix having exactly 2 argumetns)
 #  - Ternary (3 arguments)
 #  - n-ary (n arguments)
 #
@@ -82,8 +83,9 @@
 #       - Left
 #       - Missing["Unknown"]
 #
-#   meaningful: boolean ??
-
+#   meaningful: "true" if WMA defines a meaning for the operator and "false" if not.
+#               See "Operators without Built-in Meanings"
+#               https://reference.wolfram.com/language/tutorial/TextualInputAndOutput.html#41
 
 AddTo:
   actual-precedence: 120
@@ -6609,7 +6611,7 @@ Star:
   # N-tokens: {}
   # L-tokens: {"⋆"}
   # O-tokens: {}
-  # usage: "expr1 ⋆ expr2"
+  # usage: "expr1 ⋆ expr2 ⋆ expr3"
   # parse: {"Star", "[", "expr1", ",", "expr2", "]"}
   FullForm: Star[expr1, expr2]
   arity: Binary

diff --git a/mathics_scanner/generate/build_operator_tables.py b/mathics_scanner/generate/build_operator_tables.py
@@ -41,19 +41,46 @@ def read(*rnames) -> str:
     return open(osp.join(get_srcdir(), *rnames)).read()
 
 
-def compile_tables(data: Dict[str, dict]) -> Dict[str, dict]:
+def compile_tables(
+    operator_data: Dict[str, dict], character_data: Dict[str, dict]
+) -> Dict[str, dict]:
     """
     Compiles the general table into the tables used internally by the library.
     This facilitates fast access of this information by clients needing this
     information.
     """
     operator_precedence = {}
 
-    for k, v in data.items():
+    for k, v in operator_data.items():
         operator_precedence[k] = v["precedence"]
 
+    no_meaning_infix_operators = {}
+    no_meaning_prefix_operators = {}
+    no_meaning_postfix_operators = {}
+
+    for operator_name, operator_info in operator_data.items():
+        if operator_info.get("meaningful", True) is False and (
+            character_info := character_data.get(operator_name)
+        ):
+            if (unicode_char := character_info.get("unicode-equivalent")) is None:
+                if (unicode_char := character_info.get("wl-unicode")) is None:
+                    print(f"FIXME: no unicode or WMA equivalent for {operator_name}")
+                continue
+
+            affix = operator_info["affix"]
+            if affix == "Infix":
+                no_meaning_infix_operators[operator_name] = unicode_char
+            elif affix == "Postfix":
+                no_meaning_postfix_operators[operator_name] = unicode_char
+            elif affix == "Prefix":
+                no_meaning_prefix_operators[operator_name] = unicode_char
+            else:
+                print(f"FIXME: affix {affix} of {operator_name} not handled")
     return {
         "operator-precedence": operator_precedence,
+        "no-meaning-infix-operators": no_meaning_infix_operators,
+        "no-meaning-postfix-operators": no_meaning_postfix_operators,
+        "no-meaning-prefix-operators": no_meaning_prefix_operators,
     }
 
 
@@ -67,20 +94,21 @@ def compile_tables(data: Dict[str, dict]) -> Dict[str, dict]:
     "-o",
     show_default=True,
     type=click.Path(writable=True),
-    default=DEFAULT_DATA_DIR / "operators-next.json",
+    default=DEFAULT_DATA_DIR / "operators.json",
 )
 @click.argument(
     "data_dir", type=click.Path(readable=True), default=DEFAULT_DATA_DIR, required=False
 )
 def main(output, data_dir):
-    with open(data_dir / "operators.yml", "r", encoding="utf8") as i, open(
-        output, "w"
-    ) as o:
+    with open(data_dir / "operators.yml", "r", encoding="utf8") as operator_f, open(
+        data_dir / "named-characters.yml", "r", encoding="utf8"
+    ) as character_f, open(output, "w") as o:
         # Load the YAML data.
-        data = yaml.load(i, Loader=yaml.FullLoader)
+        operator_data = yaml.load(operator_f, Loader=yaml.FullLoader)
+        character_data = yaml.load(character_f, Loader=yaml.FullLoader)
 
         # Precompile the tables.
-        data = compile_tables(data)
+        data = compile_tables(operator_data, character_data)
 
         # Dump the preprocessed dictionaries to disk as JSON.
         json.dump(data, o)

diff --git a/test/test_operators.py b/test/test_operators.py
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+
+import os.path as osp
+from pathlib import Path
+
+import yaml
+
+data_dir = Path(osp.normpath(osp.dirname(__file__)), "..", "mathics_scanner", "data")
+with open(data_dir / "operators.yml", "r", encoding="utf8") as operator_f, open(
+    data_dir / "named-characters.yml", "r", encoding="utf8"
+) as character_f:
+    # Load the YAML data.
+    operator_data = yaml.load(operator_f, Loader=yaml.FullLoader)
+    character_data = yaml.load(character_f, Loader=yaml.FullLoader)
+
+
+def test_operators():
+    # We need to use "operator-name" instead of YAML "name" key
+    # because of situations like "FunctionAmpersand"
+    # which is the same as "Function", but "Function" is already
+    # needed/used as a YAML key. Apply3Ats (MapApply) is another
+    # example.
+    character_operator_names = set(
+        [
+            value["operator-name"]
+            for value in operator_data.values()
+            if "operator-name" in value
+        ]
+    )
+    operator_names = set(tuple(operator_data.keys()))
+
+    left_character_operators = {
+        operator_name
+        for operator_name in character_operator_names
+        if operator_name.startswith("Left")
+    }
+    right_character_operators = {
+        operator_name
+        for operator_name in character_operator_names
+        if operator_name.startswith("Right")
+    }
+
+    # For "Left" operators listed in name characters, check that there is a corresponding "Right"
+    # and check that the name without "Left" or "Right" appears in the operator table.
+    left_operator_remove = set()
+    for left_operator in left_character_operators:
+        if left_operator in operator_names:
+            continue
+        operator_name = left_operator[len("Left") :]
+        right_operator = "Right" + operator_name
+        assert right_operator in right_character_operators
+        assert operator_name in operator_names
+        # print(f"WOOT short found: {operator_name}")
+        left_operator_remove.add(left_operator)
+
+    right_operator_remove = set()
+    for right_operator in right_character_operators:
+        if right_operator in operator_names:
+            continue
+        operator_name = right_operator[len("Right") :]
+        left_operator = "Left" + operator_name
+        assert left_operator in left_character_operators
+        character_operator_names.remove(right_operator)
+        assert operator_name in operator_names
+        operator_names.remove(operator_name)
+        right_operator_remove.add(right_operator)
+
+    character_operator_names -= left_operator_remove
+    character_operator_names -= right_operator_remove
+
+    # For some reason we decided to exclude "Prefix" as a character operator. Add it back in here
+    character_operator_names.add("Prefix")
+
+    extra_character_operators = character_operator_names - operator_names
+
+    # FIXME: go over tables to make the below work
+    # extra_operator_names = operator_names - character_operator_names
+    # assert not extra_operator_names, f"Should not have extra operators in YAML operator table {extra_operator_names}"
+
+    assert (
+        not extra_character_operators
+    ), f"Should not have extra operators in JSON character table {extra_character_operators}"
+
+
+def test_meaningful_affix():
+    """
+    Check that all operators where the "meaningful" field is "false" have an valid affix value.
+    """
+    for operator_name, operator_info in operator_data.items():
+        if operator_info.get("meaningful", True) is False and (
+            character_info := character_data.get(operator_name)
+        ):
+            if (character_info.get("unicode-equivalent")) is None:
+                assert (
+                    character_info.get("wl-unicode") is not None
+                ), f"no unicode or WMA equivalent for {operator_name}"
+                continue
+
+            affix = operator_info["affix"]
+            assert affix in (
+                "Infix",
+                "Postfix",
+                "Prefix",
+            ), f"affix {affix} of {operator_name} not handled"