diff --git a/tests/tivars.py b/tests/tivars.py index 0b7ca65..34eb6c7 100644 --- a/tests/tivars.py +++ b/tests/tivars.py @@ -225,6 +225,9 @@ def test_newlines(self): lines = "For(A,1,10", "Disp A", "End" self.assertEqual(TIProgram("\n".join(lines)), TIProgram("\r\n".join(lines))) + def test_byte_literals(self): + self.assertEqual(TIProgram.encode(r"\x26\uAA0AXYZ\0"), b'\x26\xAA\x0AXYZ\xbb\xd70') + class NumericTests(unittest.TestCase): def real_float_test(self, real_type, filename, name, sign, exponent, mantissa, string, dec): diff --git a/tivars/tokenizer/decoder.py b/tivars/tokenizer/decoder.py index addd996..098da18 100644 --- a/tivars/tokenizer/decoder.py +++ b/tivars/tokenizer/decoder.py @@ -3,6 +3,9 @@ """ +from warnings import warn + +from tivars.data import Bytes from tivars.models import * from tivars.tokens.scripts import * @@ -44,10 +47,18 @@ def decode(bytestream: bytes, *, elif len(curr_bytes) >= 2: if not any(key.startswith(curr_bytes[:1]) for key in tokens.bytes): - raise ValueError(f"unrecognized byte '0x{curr_bytes[0]:x}' at position {index}") + warn(f"Unrecognized byte '{curr_bytes[0]:x}' at position {index}.", + BytesWarning) + + out.append(b'?' if mode == "ti_ascii" else rf"\x{curr_bytes[0]:x}") else: - raise ValueError(f"unrecognized bytes '0x{curr_bytes[0]:x}{curr_bytes[1]:x}' at position {index}") + warn(f"Unrecognized bytes '0x{curr_bytes[0]:x}{curr_bytes[1]:x}' at position {index}.", + BytesWarning) + + out.append(b'?' if mode == "ti_ascii" else rf"\u{curr_bytes[0]:x}{curr_bytes[1]:x}") + + curr_bytes = b'' elif any(curr_bytes): raise ValueError(f"unexpected null byte at position {index}") diff --git a/tivars/tokenizer/state.py b/tivars/tokenizer/state.py index 5025229..9b0f3f5 100644 --- a/tivars/tokenizer/state.py +++ b/tivars/tokenizer/state.py @@ -37,6 +37,16 @@ def munch(self, string: str, trie: TokenTrie) -> tuple[Token, str, list['Encoder :return: A tuple of the output `Token`, the remainder of ``string``, and a list of states to add to the stack """ + # Is this a byte literal? + if string.startswith(r"\x") or string.startswith(r"\u"): + length = 4 if string.startswith(r"\x") else 6 + string, remainder = string[:length], string[length:] + token = Token(bytes.fromhex(string.lstrip(r"\ux")), + {"en": Translation(b'?', string, string, [])}, + {"illegal": "true"}) + + return token, remainder, self.next(token) + tokens = trie.get_tokens(string) if not tokens: raise ValueError("no tokenization options exist")