forked from theopenconversationkit/tock
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
theopenconversationkit#1606 Add Deepl translator module
- Loading branch information
charles_moulhaud
committed
Jul 8, 2024
1 parent
c59da92
commit ed3d5d2
Showing
6 changed files
with
313 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
~ Copyright (C) 2017/2021 e-voyageurs technologies | ||
~ | ||
~ Licensed under the Apache License, Version 2.0 (the "License"); | ||
~ you may not use this file except in compliance with the License. | ||
~ You may obtain a copy of the License at | ||
~ | ||
~ http://www.apache.org/licenses/LICENSE-2.0 | ||
~ | ||
~ Unless required by applicable law or agreed to in writing, software | ||
~ distributed under the License is distributed on an "AS IS" BASIS, | ||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
~ See the License for the specific language governing permissions and | ||
~ limitations under the License. | ||
--> | ||
|
||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<parent> | ||
<groupId>ai.tock</groupId> | ||
<artifactId>tock-translator</artifactId> | ||
<version>24.3.4-SNAPSHOT</version> | ||
</parent> | ||
|
||
<artifactId>tock-deepl-translate</artifactId> | ||
<name>Tock Deepl Translator</name> | ||
<description>Deepl translator implementation</description> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>org.apache.commons</groupId> | ||
<artifactId>commons-text</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>ai.tock</groupId> | ||
<artifactId>tock-translator-core</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.squareup.okhttp3</groupId> | ||
<artifactId>okhttp</artifactId> | ||
<version>4.12.0</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.squareup.moshi</groupId> | ||
<artifactId>moshi</artifactId> | ||
<version>1.12.0</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.squareup.moshi</groupId> | ||
<artifactId>moshi-kotlin</artifactId> | ||
<version>1.12.0</version> | ||
</dependency> | ||
</dependencies> | ||
|
||
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/* | ||
* Copyright (C) 2017/2021 e-voyageurs technologies | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package ai.tock.translator.deepl | ||
import okhttp3.MediaType.Companion.toMediaTypeOrNull | ||
import okhttp3.OkHttpClient | ||
import okhttp3.Request | ||
import okhttp3.RequestBody.Companion.toRequestBody | ||
import com.squareup.moshi.Moshi | ||
import com.squareup.moshi.kotlin.reflect.KotlinJsonAdapterFactory | ||
import java.io.IOException | ||
import java.util.regex.Pattern | ||
|
||
data class TranslationResponse( | ||
val translations: List<Translation> | ||
) | ||
|
||
data class Translation( | ||
val text: String | ||
) | ||
|
||
const val TAG_HANDLING = "xml" | ||
|
||
class DeeplClient(private val apiURL: String, private val apiKey: String) { | ||
private val client = OkHttpClient() | ||
private val moshi = Moshi.Builder().add(KotlinJsonAdapterFactory()).build() | ||
private val jsonAdapter = moshi.adapter(TranslationResponse::class.java) | ||
|
||
private fun replaceSpecificPlaceholders(text: String): Pair<String, List<String>> { | ||
// Store original placeholders for later restoration | ||
val placeholderPattern = Pattern.compile("\\{:([^}]*)}") | ||
val matcher = placeholderPattern.matcher(text) | ||
|
||
val placeholders = mutableListOf<String>() | ||
while (matcher.find()) { | ||
placeholders.add(matcher.group(1)) | ||
} | ||
|
||
// Replace placeholders with '_PLACEHOLDER_' | ||
val replacedText = matcher.replaceAll("_PLACEHOLDER_") | ||
|
||
return Pair(replacedText, placeholders) | ||
} | ||
|
||
private fun revertSpecificPlaceholders(text: String, placeholders: List<String>): String { | ||
var resultText = text | ||
for (placeholder in placeholders) { | ||
resultText = resultText.replaceFirst("_PLACEHOLDER_", "{:$placeholder}") | ||
} | ||
return resultText | ||
} | ||
|
||
fun translate(text: String, sourceLang: String,targetLang: String,preserveFormatting: Boolean,glossaryId:String?): String? { | ||
val (textWithPlaceholders, originalPlaceholders) = replaceSpecificPlaceholders(text) | ||
|
||
val requestBody = buildString { | ||
append("text=$textWithPlaceholders") | ||
append("&source_lang=$sourceLang") | ||
append("&target_lang=$targetLang") | ||
append("&preserve_formatting=$preserveFormatting") | ||
append("&tag_handling=$TAG_HANDLING") | ||
|
||
if (glossaryId != "default") { | ||
append("&glossary=$glossaryId") | ||
} | ||
} | ||
|
||
val request = Request.Builder() | ||
.url(apiURL) | ||
.addHeader("Authorization", "DeepL-Auth-Key $apiKey") | ||
.post(requestBody.trimIndent().toRequestBody("application/x-www-form-urlencoded".toMediaTypeOrNull())) | ||
.build() | ||
|
||
client.newCall(request).execute().use { response -> | ||
if (!response.isSuccessful) throw IOException("Unexpected code $response") | ||
|
||
val responseBody = response.body?.string() | ||
val translationResponse = jsonAdapter.fromJson(responseBody!!) | ||
|
||
val translatedText = translationResponse?.translations?.firstOrNull()?.text | ||
return translatedText?.let { revertSpecificPlaceholders(it,originalPlaceholders) } | ||
} | ||
} | ||
} |
39 changes: 39 additions & 0 deletions
39
translator/deepl-translate/src/main/kotlin/DeeplTranslatorEngine.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/* | ||
* Copyright (C) 2017/2021 e-voyageurs technologies | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package ai.tock.translator.deepl | ||
|
||
import ai.tock.shared.property | ||
import ai.tock.translator.TranslatorEngine | ||
import org.apache.commons.text.StringEscapeUtils | ||
import java.util.Locale | ||
|
||
internal object DeeplTranslatorEngine : TranslatorEngine { | ||
|
||
private val deeplClient = DeeplClient(property ("tock_translator_deepl_api_url", "default"),property ("tock_translator_deepl_api_key", "default")) | ||
private val glossaryId = property ("tock_translator_deepl_glossaryId", "default") | ||
override val supportAdminTranslation: Boolean = true | ||
|
||
override fun translate(text: String, source: Locale, target: Locale): String { | ||
var translatedTextHTML4 = "" | ||
// Allows to filter translation on a specific language | ||
if(target.language == property ("tock_translator_deepl_target_language", "en")) { | ||
val translatedText = deeplClient.translate(text, source.language, target.language, true, glossaryId) | ||
translatedTextHTML4 = StringEscapeUtils.unescapeHtml4(translatedText) | ||
} | ||
return translatedTextHTML4 | ||
} | ||
} |
42 changes: 42 additions & 0 deletions
42
translator/deepl-translate/src/main/kotlin/DeeplTranslatorIoc.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Copyright (C) 2017/2021 e-voyageurs technologies | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package ai.tock.translator.deepl | ||
|
||
/* | ||
* Copyright (C) 2017/2021 e-voyageurs technologies | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
import ai.tock.translator.TranslatorEngine | ||
import com.github.salomonbrys.kodein.Kodein | ||
import com.github.salomonbrys.kodein.bind | ||
import com.github.salomonbrys.kodein.provider | ||
|
||
val deeplTranslatorModule = Kodein.Module { | ||
bind<TranslatorEngine>(overrides = true) with provider { DeeplTranslatorEngine } | ||
} |
76 changes: 76 additions & 0 deletions
76
translator/deepl-translate/src/test/kotlin/DeeplTranslateIntegrationTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import ai.tock.translator.deepl.DeeplTranslatorEngine | ||
import org.junit.jupiter.api.Disabled | ||
import org.junit.jupiter.api.Test | ||
import java.util.Locale | ||
import kotlin.test.assertEquals | ||
|
||
/* | ||
* Copyright (C) 2017/2021 e-voyageurs technologies | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/** | ||
* All these tests are disabled because it uses Deepl pro api that can be expensive | ||
*/ | ||
class DeeplTranslateIntegrationTest { | ||
@Test | ||
@Disabled | ||
fun simpleTest() { | ||
val result = DeeplTranslatorEngine.translate( | ||
"Bonjour, je voudrais me rendre à New-York Mardi prochain", | ||
Locale.FRENCH, | ||
Locale.ENGLISH | ||
) | ||
assertEquals("Hello, I would like to go to New York next Tuesday.", result) | ||
} | ||
|
||
@Test | ||
@Disabled | ||
fun testWithEmoticonAndAntislash() { | ||
val result = DeeplTranslatorEngine.translate("Bonjour, je suis l'Agent virtuel SNCF Voyageurs! \uD83E\uDD16\n" + | ||
"Je vous informe sur l'état du trafic en temps réel.\n" + | ||
"Dites-moi par exemple \"Mon train 6111 est-il à l'heure ?\", \"Aller à Saint-Lazare\", \"Prochains départs Gare de Lyon\" ...", | ||
Locale.FRENCH, | ||
Locale.ENGLISH | ||
) | ||
|
||
assertEquals("Hello, I'm the SNCF Voyageurs Virtual Agent! \uD83E\uDD16\n" + | ||
"I inform you about traffic conditions in real time.\n" + | ||
"Tell me for example \"Is my train 6111 on time?\", \"Going to Saint-Lazare\", \"Next departures Gare de Lyon\" ...", | ||
result | ||
) | ||
} | ||
|
||
@Test | ||
@Disabled | ||
fun testWithParameters() { | ||
val result = DeeplTranslatorEngine.translate( | ||
"Bonjour, je voudrais me rendre à {:city} {:date}", | ||
Locale.FRENCH, | ||
Locale.GERMAN | ||
) | ||
assertEquals("Hallo, ich möchte nach {:city} {:date} reisen", result) | ||
} | ||
|
||
@Test | ||
@Disabled | ||
fun testWithHTML() { | ||
val result = DeeplTranslatorEngine.translate( | ||
"Bonjour, je voudrais me rendre à Paris <br><br/> demain soir", | ||
Locale.FRENCH, | ||
Locale.GERMAN | ||
) | ||
assertEquals("Hallo, ich möchte morgen Abend nach Paris <br><br/> fahren", result) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters