From 703b8d8b27d10850673f07cc7f15c740d4612f0b Mon Sep 17 00:00:00 2001 From: boryanagoncharenko <3010723+boryanagoncharenko@users.noreply.github.com> Date: Sat, 20 Apr 2024 00:27:26 +0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=AA=B2=20Exclude=20end=20blocks=20from=20?= =?UTF-8?q?comments=20(#5409)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #5395 In a specific case, an end block (`#ENDBLOCK`) is parsed as a comment. This PR excludes end blocks from the definition of comments. Note comments are ignored in the parse tree, while end blocks are not. To ensure we will not leak an end block in an parse error message, I altered the textwithoutspaces to include a potential end block. How to test - Ensure that the following program does not produce an error in level 9 ``` repeat 3 times print 3 repeat 5 times print 5 print 1 ``` - Ensure that the following program does not show an error message which leaks a character from the endblock. It should mention that the unexpected character is a new line and not `#`: ``` for i in range 1 to 10 print i ``` --- grammars/level5-Additions.lark | 2 +- grammars/terminals.lark | 10 +++++----- tests/test_level/test_level_09.py | 24 ++++++++++++++++++++++++ tests/test_level/test_level_12.py | 24 ++++++++++++++++++++++++ tests/test_level/test_level_18.py | 26 ++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 6 deletions(-) diff --git a/grammars/level5-Additions.lark b/grammars/level5-Additions.lark index 0eb0d273f73..98a7e0cbfe1 100644 --- a/grammars/level5-Additions.lark +++ b/grammars/level5-Additions.lark @@ -45,7 +45,7 @@ nospace: /[^\n, ]/ textwithspaces: /(?:[^#\n،,,、 ]| (?!else|başka|faese|अन्यथा|否则|muuten|muidu|senão|ellers|alie|altfel|иначе|inače|altrimenti|інакше|lainnya|arall|annars|değilse|anders|ndryshe|inaczej|sinon|ไม่อย่างนั้น|sonst|లేకపోతే|sino|ورنہ|وکھرا|אחרת|وإلا))+/ -> text //anything can be parsed except for a newline and a comma for list separators //a space is allowed, but it may not be followed by an else. The part " (?!else))" means space not followed by (negative look ahead) else //That is because allowing else in strings leads to issue #303 -textwithoutspaces: /(?:[^#\n،,,、 *+\-\/eidіиlలేไamfnsbअ否אو]|א(?!חרת )|و(?!إلا |کھرا |رنہ)|否(?!则 )|अ(?!न्यथा )|и(?!наче )|m(?!uidu |uuten )|d(?!eğilse )|b(?!aşka )|n(?!dryshe )|ไ(?!ม่อย่างนั้น )|l(?!ainnya )|f(?!aese )|e(?!lse |llers )|s(?!inon |enão |ino |onst )|і(?!накше )|i(?!naczej |nače )|లే (?!కపోతే )|a(?!nders |lie |ltfel |ltrimenti |nnars |rall ))+/ -> text //anything can be parsed except for spaces (plus: a newline and a comma for list separators) +textwithoutspaces: /(?:[^#\n،,,、 *+\-\/eidіиlలేไamfnsbअ否אو]|א(?!חרת )|و(?!إلا |کھرا |رنہ)|否(?!则 )|अ(?!न्यथा )|и(?!наче )|m(?!uidu |uuten )|d(?!eğilse )|b(?!aşka )|n(?!dryshe )|ไ(?!ม่อย่างนั้น )|l(?!ainnya )|f(?!aese )|e(?!lse |llers )|s(?!inon |enão |ino |onst )|і(?!накше )|i(?!naczej |nače )|లే (?!కపోతే )|a(?!nders |lie |ltfel |ltrimenti |nnars |rall ))+/ _END_BLOCK* -> text //anything can be parsed except for spaces (plus: a newline and a comma for list separators) //the part e(?!lse)|i(?!f)) means e not followed by lse, and i not followed by f // this is because allowing else and if in invalid leads to ambiguity in the grammar // note that the i's look similar but are not: inaczej versus інакше! diff --git a/grammars/terminals.lark b/grammars/terminals.lark index a10bb7f1ab0..e5e8db00027 100644 --- a/grammars/terminals.lark +++ b/grammars/terminals.lark @@ -1,6 +1,10 @@ -COMMENT: _HASH /([^\n]*)/ +// A comment has a negative lookahead to ensure it cannot parse _END_BLOCKS +COMMENT: /#(?!ENDBLOCK)[^\n]*/ %ignore COMMENT +// Internal symbol added by the preprocess_blocks function to indicate the end of blocks +_END_BLOCK: "#ENDBLOCK" + NEGATIVE_NUMBER: _MINUS /\p{Nd}+/ ("." /\p{Nd}+/)? POSITIVE_NUMBER: /\p{Nd}+/ ("." /\p{Nd}+/)? NUMBER: NEGATIVE_NUMBER | POSITIVE_NUMBER @@ -14,10 +18,6 @@ NAME: LETTER_OR_UNDERSCORE LETTER_OR_NUMERAL* LETTER_OR_UNDERSCORE: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ LETTER_OR_NUMERAL: LETTER_OR_UNDERSCORE | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/ -// Internal symbol added by the preprocess_blocks function to indicate the end of blocks -_END_BLOCK: "#ENDBLOCK" - - //symbols. they start with an underscore so they don't appear in the parse tree (Lark convention) _EOL: "\r"?"\n"+ _SPACE: " "+ diff --git a/tests/test_level/test_level_09.py b/tests/test_level/test_level_09.py index 16e4847bb1a..2dbcf498540 100644 --- a/tests/test_level/test_level_09.py +++ b/tests/test_level/test_level_09.py @@ -165,6 +165,30 @@ def test_repeat_nested_in_repeat(self): self.multi_level_tester(code=code, expected=expected, max_level=11) + def test_repeat_nested_multi_commands(self): + code = textwrap.dedent("""\ + repeat 3 times + print 3 + repeat 5 times + print 5 + print 1""") + + expected = textwrap.dedent(f"""\ + for __i in range({self.int_cast_transpiled(3)}): + print(f'3') + for __i in range({self.int_cast_transpiled(5)}): + print(f'5') + time.sleep(0.1) + print(f'1') + time.sleep(0.1)""") + + self.multi_level_tester( + code=code, + expected=expected, + max_level=11, + skip_faulty=False + ) + def test_repeat_no_indentation(self): code = textwrap.dedent("""\ repeat 3 times diff --git a/tests/test_level/test_level_12.py b/tests/test_level/test_level_12.py index 52eff652f7e..e88752f038f 100644 --- a/tests/test_level/test_level_12.py +++ b/tests/test_level/test_level_12.py @@ -1735,6 +1735,30 @@ def test_repeat_nested_in_repeat(self): self.multi_level_tester(code=code, expected=expected, max_level=17) + def test_repeat_nested_multi_commands(self): + code = textwrap.dedent("""\ + repeat 3 times + print 3 + repeat 5 times + print 5 + print 1""") + + expected = textwrap.dedent(f"""\ + for __i in range({self.int_cast_transpiled(3)}): + print(f'''3''') + for __i in range({self.int_cast_transpiled(5)}): + print(f'''5''') + time.sleep(0.1) + print(f'''1''') + time.sleep(0.1)""") + + self.multi_level_tester( + code=code, + expected=expected, + max_level=17, + skip_faulty=False + ) + # # for list command # diff --git a/tests/test_level/test_level_18.py b/tests/test_level/test_level_18.py index 82431bf40c9..5e68d1ce117 100644 --- a/tests/test_level/test_level_18.py +++ b/tests/test_level/test_level_18.py @@ -373,3 +373,29 @@ def test_if_list_access_lhs_and_or(self, op): code=code, expected=expected, ) + + # + # repeat + # + def test_repeat_nested_multi_commands(self): + code = textwrap.dedent("""\ + repeat 3 times + print(3) + repeat 5 times + print(5) + print(1)""") + + expected = textwrap.dedent(f"""\ + for __i in range({self.int_cast_transpiled(3)}): + print(f'''3''') + for __i in range({self.int_cast_transpiled(5)}): + print(f'''5''') + time.sleep(0.1) + print(f'''1''') + time.sleep(0.1)""") + + self.multi_level_tester( + code=code, + expected=expected, + skip_faulty=False + )