diff --git a/app.py b/app.py index c6c5ac571b8..a8820380d72 100644 --- a/app.py +++ b/app.py @@ -2289,7 +2289,7 @@ def translate_keywords(): body = request.json try: translated_code = hedy_translation.translate_keywords(body.get('code'), body.get( - 'start_lang'), body.get('goal_lang'), level=int(body.get('level', 1))) + 'start_lang'), body.get('goal_lang'), level=int(body.get('level', 1)), translate_strings=True) if translated_code or translated_code == '': # empty string is False, so explicitly allow it session["previous_keyword_lang"] = body.get("start_lang") session["keyword_lang"] = body.get("goal_lang") diff --git a/hedy_translation.py b/hedy_translation.py index f446feb2be8..b38e4ea05e3 100644 --- a/hedy_translation.py +++ b/hedy_translation.py @@ -1,5 +1,5 @@ from collections import namedtuple -from lark import Token, Visitor +from lark import Token, Visitor, Tree from lark.exceptions import VisitError import hedy import operator @@ -7,11 +7,16 @@ import hedy_content from website.yaml_file import YamlFile import copy +from googletrans import Translator + # Holds the token that needs to be translated, its line number, start and # end indexes and its value (e.g. ", "). Rule = namedtuple("Rule", "keyword line start end value") +# stores the connection to Google Translate +translator = Translator() + def keywords_to_dict(lang="nl"): """ "Return a dictionary of keywords from language of choice. Key is english value is lang of choice""" @@ -73,7 +78,12 @@ def get_target_keyword(keyword_dict, keyword): return keyword -def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1): +def translate_string(string, from_lang, to_lang): + result = translator.translate(string, src=from_lang, dest=to_lang) + return result.text + + +def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1, translate_strings=False): """ "Return code with keywords translated to language of choice in level of choice""" if input_string == "": @@ -96,7 +106,7 @@ def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1): program_root = parser.parse(processed_input + "\n").children[0] - translator = Translator(processed_input) + translator = Translator(processed_input, translate_strings) translator.visit(program_root) ordered_rules = reversed(sorted(translator.rules, key=operator.attrgetter("line", "start"))) @@ -112,8 +122,17 @@ def translate_keywords(input_string, from_lang="en", to_lang="nl", level=1): target = get_target_keyword(keyword_dict_to, rule.keyword) replaced_line = replace_token_in_line(line, rule, original, target) result = replace_line(lines, rule.line - 1, replaced_line) - - # For now the needed post processing is only removing the 'end-block's added during pre-processing + else: + if translate_strings: + # this is not a keyword, so (for now) that means a text string that needs to be translated + lines = result.splitlines() # do we need to do this for each rule?? + line = lines[rule.line - 1] + original = rule.value + target = translate_string(original, from_lang, to_lang) + replaced_line = replace_token_in_line(line, rule, original, target) + result = replace_line(lines, rule.line - 1, replaced_line) + + # For now the needed post processing is only removing the 'end-block's added during pre-processing result = "\n".join([line for line in result.splitlines()]) result = result.replace("#ENDBLOCK", "") @@ -146,11 +165,15 @@ def replace_line(lines, index, line): def replace_token_in_line(line, rule, original, target): """Replaces a token in a line from the user input with its translated equivalent""" - before = "" if rule.start == 0 else line[0: rule.start] - after = "" if rule.end == len(line) - 1 else line[rule.end + 1:] - # Note that we need to replace the target value in the original value because some - # grammar rules have ambiguous length and value, e.g. _COMMA: _SPACES* - # (latin_comma | arabic_comma) _SPACES* + if rule.keyword == 'text': + before = line[:rule.start] + after = line[rule.end-1:] + else: + before = "" if rule.start == 0 else line[0: rule.start] + after = "" if rule.end == len(line) - 1 else line[rule.end + 1:] + # Note that we need to replace the target value in the original value because some + # grammar rules have ambiguous length and value, e.g. _COMMA: _SPACES* + # (latin_comma | arabic_comma) _SPACES* return before + rule.value.replace(original, target) + after @@ -194,9 +217,10 @@ class Translator(Visitor): in the user input string and original value. The information is later used to replace the token in the original user input with the translated token value.""" - def __init__(self, input_string): + def __init__(self, input_string, translate_strings=False): self.input_string = input_string self.rules = [] + self.translate_strings = translate_strings def define(self, tree): self.add_rule("_DEFINE", "define", tree) @@ -216,6 +240,15 @@ def returns(self, tree): def print(self, tree): self.add_rule("_PRINT", "print", tree) + if self.translate_strings: + # in addition to keywords, we are now also adding plain text strings + # like print arguments to the list of things that need to be translated + if len(tree.children) > 1: + # argument = str(tree.children[1].children[0]) + for argument in tree.children: + if type(argument) is Tree and argument.data == 'text': + self.add_rule("text", "text", argument) # this of course only support 1 string + def print_empty_brackets(self, tree): self.print(tree) @@ -223,6 +256,15 @@ def ask(self, tree): self.add_rule("_IS", "is", tree) self.add_rule("_ASK", "ask", tree) + if self.translate_strings: # it'd be nicer of course if this was not copy-paste from PRINT! + # in addition to keywords, we are now also adding plain text strings + # like ask arguments to the list of things that need to be translated + if len(tree.children) > 1: + # argument = str(tree.children[1].children[0]) + for argument in tree.children: + if type(argument) is Tree and argument.data == 'text': + self.add_rule("text", "text", argument) # this of course only support 1 string + def echo(self, tree): self.add_rule("_ECHO", "echo", tree) @@ -354,17 +396,27 @@ def pressed(self, tree): self.add_rule("_PRESSED", "pressed", tree) def add_rule(self, token_name, token_keyword, tree): - token = self.get_keyword_token(token_name, tree) - if token: - rule = Rule( - token_keyword, token.line, token.column - 1, token.end_column - 2, token.value - ) - self.rules.append(rule) - - def get_keyword_token(self, token_type, node): - for c in node.children: + if token_name == "text": # this is not superduper pretty but for now it works! + token = tree.children[0] + if token: + rule = Rule( + token_name, token.line, token.column - 1, token.end_column, token.value + ) + self.rules.append(rule) + else: + token = self.get_keyword_token(token_name, tree) + if token: + rule = Rule( + token_keyword, token.line, token.column - 1, token.end_column - 2, token.value + ) + self.rules.append(rule) + + def get_keyword_token(self, token_type, tree): + for c in tree.children: if type(c) is Token and c.type == token_type: return c + if type(c) is Tree and c.data == token_type: + return c.children[0] return None def get_keyword_tokens(self, token_type, node): diff --git a/requirements.txt b/requirements.txt index 9f4a0de2ddd..041c97695fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,3 +35,4 @@ doit_watch>=0.1.0 uflash>=2.0.0 pyinstaller==6.3.0 commonmark==0.9.1 +googletrans==3.1.0a0 diff --git a/tests/test_translation_level/test_translation_level_01.py b/tests/test_translation_level/test_translation_level_01.py index 81b7adf0f00..ee1f0a4a86e 100644 --- a/tests/test_translation_level/test_translation_level_01.py +++ b/tests/test_translation_level/test_translation_level_01.py @@ -25,6 +25,15 @@ def test_print_english_dutch(self): self.assertEqual(expected, result) + def test_print_english_dutch_including_string(self): + code = 'print Hello welcome to Hedy!' + + result = hedy_translation.translate_keywords( + code, from_lang="en", to_lang="nl", level=self.level, translate_strings=True) + expected = 'print Hallo welkom bij Hedy!' + + self.assertEqual(expected, result) + def test_ask_english_dutch(self): code = "ask Hallo welkom bij Hedy!" @@ -33,6 +42,15 @@ def test_ask_english_dutch(self): self.assertEqual(expected, result) + def test_ask_english_dutch_including_string(self): + code = "ask Hello welcome to Hedy!!" + + result = hedy_translation.translate_keywords(code, from_lang="en", to_lang="nl", + level=self.level, translate_strings=True) + expected = "vraag Hallo welkom bij Hedy!!" + + self.assertEqual(expected, result) + def test_echo_english_dutch(self): code = "ask Hallo welkom bij Hedy!\necho" diff --git a/tests/test_translation_level/test_translation_level_02.py b/tests/test_translation_level/test_translation_level_02.py index 0747d08fdce..5b9f5e2a944 100644 --- a/tests/test_translation_level/test_translation_level_02.py +++ b/tests/test_translation_level/test_translation_level_02.py @@ -15,12 +15,12 @@ class TestsTranslationLevel2(HedyTester): level = 2 all_keywords = hedy_translation.all_keywords_to_dict() - def test_print(self): - code = "print Hallo welkom bij Hedy!" + def test_print_including_string(self): + code = "print Hallo, welkom Hedy!" result = hedy_translation.translate_keywords( - code, "nl", "en", self.level) - expected = "print Hallo welkom bij Hedy!" + code, "nl", "en", self.level, translate_strings=True) + expected = "print Hello, welcome Hedy!" self.assertEqual(expected, result) @@ -69,6 +69,15 @@ def test_print_var_text(self): self.assertEqual(expected, result) + # def test_print_var_text_including_string(self): + # code = "welcome is Hi welcome to Hedy\nprint welcome Enjoy!" + # + # result = hedy_translation.translate_keywords( + # code, "en", "nl", self.level, translate_strings=True) + # expected = "welkom is Hallo welkom bij Hedy\nprint welkom Veel plezier" + + self.assertEqual(expected, result) + def test_ask_kewords(self): code = "hedy is vraag print ask echo" @@ -96,6 +105,27 @@ def test_ask_assign_dutch_english(self): self.assertEqual(expected, result) + def test_ask_assign_dutch_english_including_string(self): + code = textwrap.dedent("""\ + naam is vraag Hoe heet jij? + print Dus het is naam""") + + result = hedy_translation.translate_keywords( + code, "nl", "en", self.level, translate_strings=True) + + expected = textwrap.dedent("""\ + naam is ask How is called you? + print So It is name""") + + # the result sounds silly because all words are translated separately + # in levels 2 and 3, this is needed because words in between can be vars + # in level 4 it will magically be better + + # we should, of course, changed but that's not easy, and for now it is better than what we have + # also: result is naam because ask vars are not yet translated! + + self.assertEqual(expected, result) + def test_translate_back(self): code = "print welkom bij Hedy\nnaam is ask what is your name\nprint naam"