From ed3d5d244f7bf393a7f7ddd440a83ebe4d0bd90b Mon Sep 17 00:00:00 2001 From: charles_moulhaud Date: Mon, 8 Jul 2024 13:09:53 +0200 Subject: [PATCH] #1606 Add Deepl translator module --- translator/deepl-translate/pom.xml | 58 +++++++++++ .../src/main/kotlin/DeeplClient.kt | 97 +++++++++++++++++++ .../src/main/kotlin/DeeplTranslatorEngine.kt | 39 ++++++++ .../src/main/kotlin/DeeplTranslatorIoc.kt | 42 ++++++++ .../kotlin/DeeplTranslateIntegrationTest.kt | 76 +++++++++++++++ translator/pom.xml | 1 + 6 files changed, 313 insertions(+) create mode 100644 translator/deepl-translate/pom.xml create mode 100644 translator/deepl-translate/src/main/kotlin/DeeplClient.kt create mode 100644 translator/deepl-translate/src/main/kotlin/DeeplTranslatorEngine.kt create mode 100644 translator/deepl-translate/src/main/kotlin/DeeplTranslatorIoc.kt create mode 100644 translator/deepl-translate/src/test/kotlin/DeeplTranslateIntegrationTest.kt diff --git a/translator/deepl-translate/pom.xml b/translator/deepl-translate/pom.xml new file mode 100644 index 0000000000..2701a6608a --- /dev/null +++ b/translator/deepl-translate/pom.xml @@ -0,0 +1,58 @@ + + + + + 4.0.0 + + ai.tock + tock-translator + 24.3.4-SNAPSHOT + + + tock-deepl-translate + Tock Deepl Translator + Deepl translator implementation + + + + org.apache.commons + commons-text + + + ai.tock + tock-translator-core + + + com.squareup.okhttp3 + okhttp + 4.12.0 + + + com.squareup.moshi + moshi + 1.12.0 + + + com.squareup.moshi + moshi-kotlin + 1.12.0 + + + + \ No newline at end of file diff --git a/translator/deepl-translate/src/main/kotlin/DeeplClient.kt b/translator/deepl-translate/src/main/kotlin/DeeplClient.kt new file mode 100644 index 0000000000..fbeb4909e3 --- /dev/null +++ b/translator/deepl-translate/src/main/kotlin/DeeplClient.kt @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2017/2021 e-voyageurs technologies + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ai.tock.translator.deepl +import okhttp3.MediaType.Companion.toMediaTypeOrNull +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.RequestBody.Companion.toRequestBody +import com.squareup.moshi.Moshi +import com.squareup.moshi.kotlin.reflect.KotlinJsonAdapterFactory +import java.io.IOException +import java.util.regex.Pattern + +data class TranslationResponse( + val translations: List +) + +data class Translation( + val text: String +) + +const val TAG_HANDLING = "xml" + +class DeeplClient(private val apiURL: String, private val apiKey: String) { + private val client = OkHttpClient() + private val moshi = Moshi.Builder().add(KotlinJsonAdapterFactory()).build() + private val jsonAdapter = moshi.adapter(TranslationResponse::class.java) + + private fun replaceSpecificPlaceholders(text: String): Pair> { + // Store original placeholders for later restoration + val placeholderPattern = Pattern.compile("\\{:([^}]*)}") + val matcher = placeholderPattern.matcher(text) + + val placeholders = mutableListOf() + while (matcher.find()) { + placeholders.add(matcher.group(1)) + } + + // Replace placeholders with '_PLACEHOLDER_' + val replacedText = matcher.replaceAll("_PLACEHOLDER_") + + return Pair(replacedText, placeholders) + } + + private fun revertSpecificPlaceholders(text: String, placeholders: List): String { + var resultText = text + for (placeholder in placeholders) { + resultText = resultText.replaceFirst("_PLACEHOLDER_", "{:$placeholder}") + } + return resultText + } + + fun translate(text: String, sourceLang: String,targetLang: String,preserveFormatting: Boolean,glossaryId:String?): String? { + val (textWithPlaceholders, originalPlaceholders) = replaceSpecificPlaceholders(text) + + val requestBody = buildString { + append("text=$textWithPlaceholders") + append("&source_lang=$sourceLang") + append("&target_lang=$targetLang") + append("&preserve_formatting=$preserveFormatting") + append("&tag_handling=$TAG_HANDLING") + + if (glossaryId != "default") { + append("&glossary=$glossaryId") + } + } + + val request = Request.Builder() + .url(apiURL) + .addHeader("Authorization", "DeepL-Auth-Key $apiKey") + .post(requestBody.trimIndent().toRequestBody("application/x-www-form-urlencoded".toMediaTypeOrNull())) + .build() + + client.newCall(request).execute().use { response -> + if (!response.isSuccessful) throw IOException("Unexpected code $response") + + val responseBody = response.body?.string() + val translationResponse = jsonAdapter.fromJson(responseBody!!) + + val translatedText = translationResponse?.translations?.firstOrNull()?.text + return translatedText?.let { revertSpecificPlaceholders(it,originalPlaceholders) } + } + } +} \ No newline at end of file diff --git a/translator/deepl-translate/src/main/kotlin/DeeplTranslatorEngine.kt b/translator/deepl-translate/src/main/kotlin/DeeplTranslatorEngine.kt new file mode 100644 index 0000000000..debbdf9246 --- /dev/null +++ b/translator/deepl-translate/src/main/kotlin/DeeplTranslatorEngine.kt @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2017/2021 e-voyageurs technologies + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ai.tock.translator.deepl + +import ai.tock.shared.property +import ai.tock.translator.TranslatorEngine +import org.apache.commons.text.StringEscapeUtils +import java.util.Locale + +internal object DeeplTranslatorEngine : TranslatorEngine { + + private val deeplClient = DeeplClient(property ("tock_translator_deepl_api_url", "default"),property ("tock_translator_deepl_api_key", "default")) + private val glossaryId = property ("tock_translator_deepl_glossaryId", "default") + override val supportAdminTranslation: Boolean = true + + override fun translate(text: String, source: Locale, target: Locale): String { + var translatedTextHTML4 = "" + // Allows to filter translation on a specific language + if(target.language == property ("tock_translator_deepl_target_language", "en")) { + val translatedText = deeplClient.translate(text, source.language, target.language, true, glossaryId) + translatedTextHTML4 = StringEscapeUtils.unescapeHtml4(translatedText) + } + return translatedTextHTML4 + } +} diff --git a/translator/deepl-translate/src/main/kotlin/DeeplTranslatorIoc.kt b/translator/deepl-translate/src/main/kotlin/DeeplTranslatorIoc.kt new file mode 100644 index 0000000000..c2954fc393 --- /dev/null +++ b/translator/deepl-translate/src/main/kotlin/DeeplTranslatorIoc.kt @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2017/2021 e-voyageurs technologies + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ai.tock.translator.deepl + +/* + * Copyright (C) 2017/2021 e-voyageurs technologies + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import ai.tock.translator.TranslatorEngine +import com.github.salomonbrys.kodein.Kodein +import com.github.salomonbrys.kodein.bind +import com.github.salomonbrys.kodein.provider + +val deeplTranslatorModule = Kodein.Module { + bind(overrides = true) with provider { DeeplTranslatorEngine } +} diff --git a/translator/deepl-translate/src/test/kotlin/DeeplTranslateIntegrationTest.kt b/translator/deepl-translate/src/test/kotlin/DeeplTranslateIntegrationTest.kt new file mode 100644 index 0000000000..7c90640ab1 --- /dev/null +++ b/translator/deepl-translate/src/test/kotlin/DeeplTranslateIntegrationTest.kt @@ -0,0 +1,76 @@ +import ai.tock.translator.deepl.DeeplTranslatorEngine +import org.junit.jupiter.api.Disabled +import org.junit.jupiter.api.Test +import java.util.Locale +import kotlin.test.assertEquals + +/* + * Copyright (C) 2017/2021 e-voyageurs technologies + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * All these tests are disabled because it uses Deepl pro api that can be expensive + */ +class DeeplTranslateIntegrationTest { + @Test + @Disabled + fun simpleTest() { + val result = DeeplTranslatorEngine.translate( + "Bonjour, je voudrais me rendre à New-York Mardi prochain", + Locale.FRENCH, + Locale.ENGLISH + ) + assertEquals("Hello, I would like to go to New York next Tuesday.", result) + } + + @Test + @Disabled + fun testWithEmoticonAndAntislash() { + val result = DeeplTranslatorEngine.translate("Bonjour, je suis l'Agent virtuel SNCF Voyageurs! \uD83E\uDD16\n" + + "Je vous informe sur l'état du trafic en temps réel.\n" + + "Dites-moi par exemple \"Mon train 6111 est-il à l'heure ?\", \"Aller à Saint-Lazare\", \"Prochains départs Gare de Lyon\" ...", + Locale.FRENCH, + Locale.ENGLISH + ) + + assertEquals("Hello, I'm the SNCF Voyageurs Virtual Agent! \uD83E\uDD16\n" + + "I inform you about traffic conditions in real time.\n" + + "Tell me for example \"Is my train 6111 on time?\", \"Going to Saint-Lazare\", \"Next departures Gare de Lyon\" ...", + result + ) + } + + @Test + @Disabled + fun testWithParameters() { + val result = DeeplTranslatorEngine.translate( + "Bonjour, je voudrais me rendre à {:city} {:date}", + Locale.FRENCH, + Locale.GERMAN + ) + assertEquals("Hallo, ich möchte nach {:city} {:date} reisen", result) + } + + @Test + @Disabled + fun testWithHTML() { + val result = DeeplTranslatorEngine.translate( + "Bonjour, je voudrais me rendre à Paris

demain soir", + Locale.FRENCH, + Locale.GERMAN + ) + assertEquals("Hallo, ich möchte morgen Abend nach Paris

fahren", result) + } +} \ No newline at end of file diff --git a/translator/pom.xml b/translator/pom.xml index bc18f8098e..d60f460337 100644 --- a/translator/pom.xml +++ b/translator/pom.xml @@ -33,6 +33,7 @@ core noop google-translate + deepl-translate