Skip to content

Commit

Permalink
Include no-meaning operators in JSON op extraction (#85)
Browse files Browse the repository at this point in the history
Include no-meaning operators in JSON op extraction, separated by affix: Infix, Postifx, or Prefix.

Also, update "meaningful" field comment at the top of the YAML file.

Add a unit test for operator-table consistency.
  • Loading branch information
rocky authored Nov 20, 2024
1 parent 051cb0c commit ad488f8
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 11 deletions.
8 changes: 5 additions & 3 deletions mathics_scanner/data/operators.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
# - NullAry (0 arguments),
# - Unary (1 argument),
# - Binary (2 arguments)
# - Infix (2 or more arguments; Binary is a special case of Infix having exactly 2 argumetns)
# - Ternary (3 arguments)
# - n-ary (n arguments)
#
Expand Down Expand Up @@ -82,8 +83,9 @@
# - Left
# - Missing["Unknown"]
#
# meaningful: boolean ??

# meaningful: "true" if WMA defines a meaning for the operator and "false" if not.
# See "Operators without Built-in Meanings"
# https://reference.wolfram.com/language/tutorial/TextualInputAndOutput.html#41

AddTo:
actual-precedence: 120
Expand Down Expand Up @@ -6609,7 +6611,7 @@ Star:
# N-tokens: {}
# L-tokens: {"⋆"}
# O-tokens: {}
# usage: "expr1 ⋆ expr2"
# usage: "expr1 ⋆ expr2 ⋆ expr3"
# parse: {"Star", "[", "expr1", ",", "expr2", "]"}
FullForm: Star[expr1, expr2]
arity: Binary
Expand Down
44 changes: 36 additions & 8 deletions mathics_scanner/generate/build_operator_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,46 @@ def read(*rnames) -> str:
return open(osp.join(get_srcdir(), *rnames)).read()


def compile_tables(data: Dict[str, dict]) -> Dict[str, dict]:
def compile_tables(
operator_data: Dict[str, dict], character_data: Dict[str, dict]
) -> Dict[str, dict]:
"""
Compiles the general table into the tables used internally by the library.
This facilitates fast access of this information by clients needing this
information.
"""
operator_precedence = {}

for k, v in data.items():
for k, v in operator_data.items():
operator_precedence[k] = v["precedence"]

no_meaning_infix_operators = {}
no_meaning_prefix_operators = {}
no_meaning_postfix_operators = {}

for operator_name, operator_info in operator_data.items():
if operator_info.get("meaningful", True) is False and (
character_info := character_data.get(operator_name)
):
if (unicode_char := character_info.get("unicode-equivalent")) is None:
if (unicode_char := character_info.get("wl-unicode")) is None:
print(f"FIXME: no unicode or WMA equivalent for {operator_name}")
continue

affix = operator_info["affix"]
if affix == "Infix":
no_meaning_infix_operators[operator_name] = unicode_char
elif affix == "Postfix":
no_meaning_postfix_operators[operator_name] = unicode_char
elif affix == "Prefix":
no_meaning_prefix_operators[operator_name] = unicode_char
else:
print(f"FIXME: affix {affix} of {operator_name} not handled")
return {
"operator-precedence": operator_precedence,
"no-meaning-infix-operators": no_meaning_infix_operators,
"no-meaning-postfix-operators": no_meaning_postfix_operators,
"no-meaning-prefix-operators": no_meaning_prefix_operators,
}


Expand All @@ -67,20 +94,21 @@ def compile_tables(data: Dict[str, dict]) -> Dict[str, dict]:
"-o",
show_default=True,
type=click.Path(writable=True),
default=DEFAULT_DATA_DIR / "operators-next.json",
default=DEFAULT_DATA_DIR / "operators.json",
)
@click.argument(
"data_dir", type=click.Path(readable=True), default=DEFAULT_DATA_DIR, required=False
)
def main(output, data_dir):
with open(data_dir / "operators.yml", "r", encoding="utf8") as i, open(
output, "w"
) as o:
with open(data_dir / "operators.yml", "r", encoding="utf8") as operator_f, open(
data_dir / "named-characters.yml", "r", encoding="utf8"
) as character_f, open(output, "w") as o:
# Load the YAML data.
data = yaml.load(i, Loader=yaml.FullLoader)
operator_data = yaml.load(operator_f, Loader=yaml.FullLoader)
character_data = yaml.load(character_f, Loader=yaml.FullLoader)

# Precompile the tables.
data = compile_tables(data)
data = compile_tables(operator_data, character_data)

# Dump the preprocessed dictionaries to disk as JSON.
json.dump(data, o)
Expand Down
104 changes: 104 additions & 0 deletions test/test_operators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-

import os.path as osp
from pathlib import Path

import yaml

data_dir = Path(osp.normpath(osp.dirname(__file__)), "..", "mathics_scanner", "data")
with open(data_dir / "operators.yml", "r", encoding="utf8") as operator_f, open(
data_dir / "named-characters.yml", "r", encoding="utf8"
) as character_f:
# Load the YAML data.
operator_data = yaml.load(operator_f, Loader=yaml.FullLoader)
character_data = yaml.load(character_f, Loader=yaml.FullLoader)


def test_operators():
# We need to use "operator-name" instead of YAML "name" key
# because of situations like "FunctionAmpersand"
# which is the same as "Function", but "Function" is already
# needed/used as a YAML key. Apply3Ats (MapApply) is another
# example.
character_operator_names = set(
[
value["operator-name"]
for value in operator_data.values()
if "operator-name" in value
]
)
operator_names = set(tuple(operator_data.keys()))

left_character_operators = {
operator_name
for operator_name in character_operator_names
if operator_name.startswith("Left")
}
right_character_operators = {
operator_name
for operator_name in character_operator_names
if operator_name.startswith("Right")
}

# For "Left" operators listed in name characters, check that there is a corresponding "Right"
# and check that the name without "Left" or "Right" appears in the operator table.
left_operator_remove = set()
for left_operator in left_character_operators:
if left_operator in operator_names:
continue
operator_name = left_operator[len("Left") :]
right_operator = "Right" + operator_name
assert right_operator in right_character_operators
assert operator_name in operator_names
# print(f"WOOT short found: {operator_name}")
left_operator_remove.add(left_operator)

right_operator_remove = set()
for right_operator in right_character_operators:
if right_operator in operator_names:
continue
operator_name = right_operator[len("Right") :]
left_operator = "Left" + operator_name
assert left_operator in left_character_operators
character_operator_names.remove(right_operator)
assert operator_name in operator_names
operator_names.remove(operator_name)
right_operator_remove.add(right_operator)

character_operator_names -= left_operator_remove
character_operator_names -= right_operator_remove

# For some reason we decided to exclude "Prefix" as a character operator. Add it back in here
character_operator_names.add("Prefix")

extra_character_operators = character_operator_names - operator_names

# FIXME: go over tables to make the below work
# extra_operator_names = operator_names - character_operator_names
# assert not extra_operator_names, f"Should not have extra operators in YAML operator table {extra_operator_names}"

assert (
not extra_character_operators
), f"Should not have extra operators in JSON character table {extra_character_operators}"


def test_meaningful_affix():
"""
Check that all operators where the "meaningful" field is "false" have an valid affix value.
"""
for operator_name, operator_info in operator_data.items():
if operator_info.get("meaningful", True) is False and (
character_info := character_data.get(operator_name)
):
if (character_info.get("unicode-equivalent")) is None:
assert (
character_info.get("wl-unicode") is not None
), f"no unicode or WMA equivalent for {operator_name}"
continue

affix = operator_info["affix"]
assert affix in (
"Infix",
"Postfix",
"Prefix",
), f"affix {affix} of {operator_name} not handled"

0 comments on commit ad488f8

Please sign in to comment.