diff --git a/mathics_scanner/feed.py b/mathics_scanner/feed.py index 8cc0d8f..f5b1c6e 100644 --- a/mathics_scanner/feed.py +++ b/mathics_scanner/feed.py @@ -74,7 +74,7 @@ def message(self, symbol_name: str, tag: str, *args) -> None: def syntax_message(self, symbol_name: str, tag: str, *args) -> list: """ - Append a syntax-message error message to the message queue. + Append a "Syntax" error message to the message queue. """ if len(args) > 3: diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py index 90a41f4..11638e4 100644 --- a/mathics_scanner/tokeniser.py +++ b/mathics_scanner/tokeniser.py @@ -414,19 +414,17 @@ def incomplete(self): self.prescanner.incomplete() self.code += self.prescanner.replace_escape_sequences() - def sntx_message(self, pos: Optional[int] = None, tag: Optional[str] = None): + def sntx_message(self, pos: Optional[int] = None): """ Send a "syntx{b,f} error message to the input-reading feeder. """ if pos is None: pos = self.pos pre, post = self.code[:pos], self.code[pos:].rstrip("\n") - if tag is None: - tag = "sntxb" if pos == 0 else "sntxf" if pos == 0: - self.feeder.message("Syntax", tag, post) + self.feeder.message("Syntax", "sntxb", post) else: - self.feeder.message("Syntax", tag, pre, post) + self.feeder.message("Syntax", "sntxf", pre, post) # TODO: Convert this to __next__ in the future. def next(self) -> Token: @@ -551,13 +549,13 @@ def t_String(self, match: re.Match) -> Token: "Break out from self.code the next token which is expected to be a String" start, end = self.pos, None self.pos += 1 # skip opening '"' - skipped_chars = [] + newlines = [] while True: if self.pos >= len(self.code): if end is None: # reached end while still inside string self.incomplete() - skipped_chars.append(self.pos) + newlines.append(self.pos) else: break char = self.code[self.pos] @@ -573,7 +571,7 @@ def t_String(self, match: re.Match) -> Token: if self.pos + 1 == len(self.code): # We have a \ at the end of a line. self.incomplete() - skipped_chars.append(self.pos) + newlines.append(self.pos) # Code below is in pre-scanner. We might decide # later to move that code here. @@ -586,7 +584,7 @@ def t_String(self, match: re.Match) -> Token: # "\\" have the backslash preserved. But for other # characters, the backslash is removed. if self.code[self.pos + 1] not in ( - "b", # bell? + "b", # word boundary? "f", # form-feed? "n", # newline "r", # carrage return @@ -604,7 +602,7 @@ def t_String(self, match: re.Match) -> Token: else: self.pos += 1 - indices = [start] + skipped_chars + [end] + indices = [start] + newlines + [end] result = "".join( self.code[indices[i] : indices[i + 1]] for i in range(len(indices) - 1) ) diff --git a/test/test_string_tokens.py b/test/test_string_tokens.py index 9e73a55..52e0524 100644 --- a/test/test_string_tokens.py +++ b/test/test_string_tokens.py @@ -64,4 +64,4 @@ def test_string(): check_string(r'"a\"b\\c"', r'"a\"b\\c"') incomplete_error(r'"abc', "String does not have terminating quote") incomplete_error(r'"\"', "Unterminated escape sequence") - incomplete_error(r'"a\X"', '"X" is not a valid escape character') + scan_error(r'"a\X"', '"X" is not a valid escape character')