Skip to content

Commit

Permalink
Start deeper tokenizer testing
Browse files Browse the repository at this point in the history
  • Loading branch information
rocky committed Oct 29, 2024
1 parent ac7228d commit e367ac5
Showing 1 changed file with 92 additions and 51 deletions.
143 changes: 92 additions & 51 deletions test/test_tokeniser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,88 +3,102 @@
Tests translation from strings to sequences of tokens.
"""

import pytest
import random
import sys
from typing import List

import pytest

from mathics_scanner.tokeniser import Tokeniser, Token, is_symbol_name
from mathics_scanner.errors import ScanError, IncompleteSyntaxError, InvalidSyntaxError
from mathics_scanner.errors import IncompleteSyntaxError, InvalidSyntaxError, ScanError
from mathics_scanner.feed import SingleLineFeeder
from mathics_scanner.tokeniser import Token, Tokeniser, is_symbol_name


def check_number(code):
token = single_token(code)
assert token, Token("Number", code, 0)
def check_number(mathics3_code: str):
token = single_token(mathics3_code)
assert token, Token("Number", mathics3_code, 0)


def check_symbol(code):
token = single_token(code)
assert token, Token("Symbol", code, 0)
def check_symbol(mathics3_code: str):
token = single_token(mathics3_code)
assert token, Token("Symbol", mathics3_code, 0)


def check_string(code):
token = single_token(code)
assert token, Token("String", code, 0)
def check_string(mathics3_code: str):
token = single_token(mathics3_code)
assert token, Token("String", mathics3_code, 0)


def incomplete_error(string):
with pytest.raises(IncompleteSyntaxError):
tokens(string)
get_mathics3_tokens(string)


def invalid_error(string):
with pytest.raises(InvalidSyntaxError):
tokens(string)
get_mathics3_tokens(string)


def scan_error(string):
with pytest.raises(ScanError):
tokens(string)
get_mathics3_tokens(string)


def single_token(code):
toks = tokens(code)
def single_token(mathics3_code: str):
toks = get_mathics3_tokens(mathics3_code)
assert len(toks) == 1
token = toks[0]
return token


def tags(code):
return [token.tag for token in tokens(code)]
def tags(mathics3_code: str):
return [token.tag for token in get_mathics3_tokens(mathics3_code)]


def tokens(code):
tokeniser = Tokeniser(SingleLineFeeder(code))
tokens = []
def mathics3_token_generator(tokenizer):
"""
A generator that returns the next token in string mathics3_code.
"""
while True:
token = tokeniser.next()
token = tokenizer.next()
if token.tag == "END":
yield token
break
else:
tokens.append(token)
return tokens
yield token
return


def get_mathics3_tokens(mathics3_code: str) -> List[Token]:
"""
Returns the sequence of tokesnall of the tokens A generator that returns the next token in string mathics3_code.
"""
tokenizer = Tokeniser(SingleLineFeeder(mathics3_code))
mathics3_tokens = list(mathics3_token_generator(tokenizer))
assert len(mathics3_tokens) > 0
assert mathics3_tokens[-1].tag == "END"
return mathics3_tokens[:-1]


def test_apply():
assert tokens("f // x") == [
assert get_mathics3_tokens("f // x") == [
Token("Symbol", "f", 0),
Token("Postfix", "//", 2),
Token("Symbol", "x", 5),
]
assert tokens("f @ x") == [
assert get_mathics3_tokens("f @ x") == [
Token("Symbol", "f", 0),
Token("Prefix", "@", 2),
Token("Symbol", "x", 4),
]
assert tokens("f ~ x") == [
assert get_mathics3_tokens("f ~ x") == [
Token("Symbol", "f", 0),
Token("Infix", "~", 2),
Token("Symbol", "x", 4),
]


def test_association():
assert tokens("<|x -> m|>") == [
assert get_mathics3_tokens("<|x -> m|>") == [
Token("RawLeftAssociation", "<|", 0),
Token("Symbol", "x", 2),
Token("Rule", "->", 4),
Expand All @@ -94,38 +108,56 @@ def test_association():


def test_backslash():
assert tokens("\\[Backslash]") == [Token("Backslash", "\u2216", 0)]
assert get_mathics3_tokens("\\[Backslash]") == [Token("Backslash", "\u2216", 0)]

assert tokens("\\ a") == [Token("RawBackslash", "\\", 0), Token("Symbol", "a", 2)]
assert get_mathics3_tokens("\\ a") == [
Token("RawBackslash", "\\", 0),
Token("Symbol", "a", 2),
]

incomplete_error("\\")


def test_boxes():
assert tokens("\\(1\\)") == [
Token("LeftRowBox", "\\(", 0),
Token("Number", "1", 2),
Token("RightRowBox", "\\)", 3),
]
tokenizer = Tokeniser(SingleLineFeeder("\\(1\\)"))
assert tokenizer.mode == "expr"
token_generator = mathics3_token_generator(tokenizer)
for expect_token, expect_mode in (
(Token("LeftRowBox", "\\(", 0), "box_expr"),
(Token("Number", "1", 2), "box_expr"),
(Token("RightRowBox", "\\)", 3), "expr"),
):
token = next(token_generator)
assert token == expect_token
assert tokenizer.mode == expect_mode


def test_information():
assert tokens("??Sin") == [Token("Information", "??", 0), Token("Symbol", "Sin", 2)]
assert get_mathics3_tokens("??Sin") == [
Token("Information", "??", 0),
Token("Symbol", "Sin", 2),
]

assert tokens("? ?Sin") == [
assert get_mathics3_tokens("? ?Sin") == [
Token("PatternTest", "?", 0),
Token("PatternTest", "?", 2),
Token("Symbol", "Sin", 3),
]


def test_int_repeated():
assert tokens("1..") == [Token("Number", "1", 0), Token("Repeated", "..", 1)]
assert tokens("1. .") == [Token("Number", "1.", 0), Token("Dot", ".", 3)]
assert get_mathics3_tokens("1..") == [
Token("Number", "1", 0),
Token("Repeated", "..", 1),
]
assert get_mathics3_tokens("1. .") == [
Token("Number", "1.", 0),
Token("Dot", ".", 3),
]


def test_integeral():
assert tokens("\u222B x \uf74c y") == [
assert get_mathics3_tokens("\u222B x \uf74c y") == [
Token("Integral", "\u222B", 0),
Token("Symbol", "x", 2),
Token("DifferentialD", "\uf74c", 4),
Expand Down Expand Up @@ -171,7 +203,7 @@ def test_number_real():


def test_pre():
assert tokens("++x++") == [
assert get_mathics3_tokens("++x++") == [
Token("Increment", "++", 0),
Token("Symbol", "x", 2),
Token("Increment", "++", 3),
Expand All @@ -193,12 +225,12 @@ def test_string():


def test_set():
assert tokens("x = y") == [
assert get_mathics3_tokens("x = y") == [
Token("Symbol", "x", 0),
Token("Set", "=", 2),
Token("Symbol", "y", 4),
]
assert tokens("x /: y = z") == [
assert get_mathics3_tokens("x /: y = z") == [
Token("Symbol", "x", 0),
Token("TagSet", "/:", 2),
Token("Symbol", "y", 5),
Expand All @@ -215,16 +247,25 @@ def test_symbol():


def test_unset():
assert tokens("=.") == [Token("Unset", "=.", 0)]
assert get_mathics3_tokens("=.") == [Token("Unset", "=.", 0)]

assert tokens("= .") == [Token("Unset", "= .", 0)]
assert tokens("=.5") == [Token("Set", "=", 0), Token("Number", ".5", 1)]
assert tokens("= ..") == [Token("Set", "=", 0), Token("Repeated", "..", 2)]
assert get_mathics3_tokens("= .") == [Token("Unset", "= .", 0)]
assert get_mathics3_tokens("=.5") == [
Token("Set", "=", 0),
Token("Number", ".5", 1),
]
assert get_mathics3_tokens("= ..") == [
Token("Set", "=", 0),
Token("Repeated", "..", 2),
]


def test_function():
assert tokens("x&") == [Token("Symbol", "x", 0), Token("Function", "&", 1)]
assert tokens("x\uf4a1") == [
assert get_mathics3_tokens("x&") == [
Token("Symbol", "x", 0),
Token("Function", "&", 1),
]
assert get_mathics3_tokens("x\uf4a1") == [
Token("Symbol", "x", 0),
Token("Function", "\uf4a1", 1),
]

0 comments on commit e367ac5

Please sign in to comment.