From e47c4398913a1d595f0fdc85f2b1351d844d3590 Mon Sep 17 00:00:00 2001 From: yonghanJu Date: Tue, 14 Nov 2023 22:22:39 +0900 Subject: [PATCH 1/3] feat: add AutoTextCorrector --- autotextcorrection/build.gradle.kts | 13 ++++ .../autotextcorrection/AutoTextCorrector.kt | 78 +++++++++++++++++++ .../AutoTextCorrectorUnitTest.kt | 47 +++++++++++ textautoconvert/build.gradle.kts | 9 +++ 4 files changed, 147 insertions(+) create mode 100644 autotextcorrection/build.gradle.kts create mode 100644 autotextcorrection/src/main/java/com/konkuk/autotextcorrection/AutoTextCorrector.kt create mode 100644 autotextcorrection/src/test/java/com/konkuk/autotextcorrection/AutoTextCorrectorUnitTest.kt create mode 100644 textautoconvert/build.gradle.kts diff --git a/autotextcorrection/build.gradle.kts b/autotextcorrection/build.gradle.kts new file mode 100644 index 0000000..824d24e --- /dev/null +++ b/autotextcorrection/build.gradle.kts @@ -0,0 +1,13 @@ +plugins { + id("java-library") + id("org.jetbrains.kotlin.jvm") +} + +java { + sourceCompatibility = JavaVersion.VERSION_1_7 + targetCompatibility = JavaVersion.VERSION_1_7 +} + +dependencies { + implementation(UnitTest.JUNIT) +} diff --git a/autotextcorrection/src/main/java/com/konkuk/autotextcorrection/AutoTextCorrector.kt b/autotextcorrection/src/main/java/com/konkuk/autotextcorrection/AutoTextCorrector.kt new file mode 100644 index 0000000..fe0f5ce --- /dev/null +++ b/autotextcorrection/src/main/java/com/konkuk/autotextcorrection/AutoTextCorrector.kt @@ -0,0 +1,78 @@ +package com.konkuk.autotextcorrection + +class AutoTextCorrector( + targetTextList: List, + val errorCorrectionDistance: Int = 1, +) { + + private val targetWordsMap = mutableMapOf() + private val targetTextDestructedList = targetTextList.map { word -> + val destructedWord = destructWord(word) + targetWordsMap[destructedWord] = word + destructedWord + } + + fun destructWord(word: String): String { + var result = StringBuilder() + for (element in word) { + var uniVal = element + + if (uniVal.code >= 0xAC00) { + uniVal = (uniVal.code - 0xAC00).toChar() + val cho = (uniVal.code / 28 / 21).toChar() + val joong = (uniVal.code / 28 % 21).toChar() + val jong = (uniVal.code % 28).toChar() + result.append(INITIAL_CHARACTER[cho.code] + MID_CHARACTER[joong.code] + FINAL_CHARACTER[jong.code]) + } else { + result.append(element) + } + } + return String(result) + } + + fun correctWord(word: String): String { + if (word.isEmpty()) return word + val destructedWord = destructWord(word) + for (destructedChars in targetTextDestructedList) { + val dp = List(destructedChars.length + 1) { IntArray(destructedWord.length + 1) } + for (i in 1..destructedChars.length) { + for (j in 1..destructedWord.length) { + if (destructedChars[i - 1] == destructedWord[j - 1]) { + dp[i][j] = dp[i - 1][j - 1] + 1 + } else { + dp[i][j] = maxOf(dp[i - 1][j], dp[i][j - 1]) + } + } + } + if (abs(dp.last().last() - destructedChars.length) <= errorCorrectionDistance && + abs(dp.last().last() - destructedWord.length) <= errorCorrectionDistance + ) { + return targetWordsMap[destructedChars]!! + } + } + return word + } + + fun correctText(text: String): String { + return text.split(" ").map { word -> correctWord(word) }.joinToString(" ") + } + + private fun abs(n: Int) = if (n < 0) -n else n + + companion object { + private val INITIAL_CHARACTER = listOf( + "ㄱ", "ㄲ", "ㄴ", "ㄷ", "ㄸ", "ㄹ", "ㅁ", "ㅂ", "ㅃ", + "ㅅ", "ㅆ", "ㅇ", "ㅈ", "ㅉ", "ㅊ", "ㅋ", "ㅌ", "ㅍ", "ㅎ", + ) + + private val MID_CHARACTER = listOf( + "ㅏ", "ㅐ", "ㅑ", "ㅒ", "ㅓ", "ㅔ", "ㅕ", "ㅖ", "ㅗ", "ㅘ", + "ㅙ", "ㅚ", "ㅛ", "ㅜ", "ㅝ", "ㅞ", "ㅟ", "ㅠ", "ㅡ", "ㅢ", "ㅣ", + ) + + private val FINAL_CHARACTER = listOf( + "", "ㄱ", "ㄲ", "ㄳ", "ㄴ", "ㄵ", "ㄶ", "ㄷ", "ㄹ", "ㄺ", "ㄻ", "ㄼ", + "ㄽ", "ㄾ", "ㄿ", "ㅀ", "ㅁ", "ㅂ", "ㅄ", "ㅅ", "ㅆ", "ㅇ", "ㅈ", "ㅊ", "ㅋ", "ㅌ", "ㅍ", "ㅎ", + ) + } +} diff --git a/autotextcorrection/src/test/java/com/konkuk/autotextcorrection/AutoTextCorrectorUnitTest.kt b/autotextcorrection/src/test/java/com/konkuk/autotextcorrection/AutoTextCorrectorUnitTest.kt new file mode 100644 index 0000000..ac1dfc1 --- /dev/null +++ b/autotextcorrection/src/test/java/com/konkuk/autotextcorrection/AutoTextCorrectorUnitTest.kt @@ -0,0 +1,47 @@ +package com.konkuk.autotextcorrection + +import org.junit.Assert +import org.junit.Test + +class AutoTextCorrectorUnitTest { + + private val targetTextList = listOf("칼로리", "나트륨", "단백질", "탄수화물") + private val corrector = AutoTextCorrector( + targetTextList = targetTextList, + errorCorrectionDistance = 2, + ) + + @Test + fun `destruct_corrector가 string을 char로 분해`() { + Assert.assertEquals( + targetTextList.map { corrector.destructWord(it) }, + listOf( + "ㅋㅏㄹㄹㅗㄹㅣ", + "ㄴㅏㅌㅡㄹㅠㅁ", + "ㄷㅏㄴㅂㅐㄱㅈㅣㄹ", + "ㅌㅏㄴㅅㅜㅎㅘㅁㅜㄹ", + ), + ) + } + + @Test + fun `correct_corrector가 word를 수정`() { + Assert.assertEquals( + "나트륨", + corrector.correctWord("니트륨"), + ) + + Assert.assertEquals( + "탄수화물", + corrector.correctWord("탐수화믈"), + ) + } + + @Test + fun `correct_corrector가 오류를 수정`() { + Assert.assertEquals( + "나트륨 정말 맛있어,ㅎㅎ 그리고 나는 탄수화물 매우 좋아하지!! 칼로리 너무 행복", + corrector.correctText("니트륨 정말 맛있어,ㅎㅎ 그리고 나는 탐수화믈 매우 좋아하지!! 칼로ㅏ 너무 행복"), + ) + } +} diff --git a/textautoconvert/build.gradle.kts b/textautoconvert/build.gradle.kts new file mode 100644 index 0000000..0d31ad5 --- /dev/null +++ b/textautoconvert/build.gradle.kts @@ -0,0 +1,9 @@ +plugins { + id("java-library") + id("org.jetbrains.kotlin.jvm") +} + +java { + sourceCompatibility = JavaVersion.VERSION_1_7 + targetCompatibility = JavaVersion.VERSION_1_7 +} \ No newline at end of file From b1bd753a1710ba9b03ae170e40840f2941585a7c Mon Sep 17 00:00:00 2001 From: yonghanJu Date: Tue, 14 Nov 2023 22:23:44 +0900 Subject: [PATCH 2/3] =?UTF-8?q?feat:=20AutoTextCorrector=20=EC=A0=81?= =?UTF-8?q?=EC=9A=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- feat/capture/build.gradle.kts | 1 + .../ui/enroll/EnrollTextInputViewModel.kt | 24 +++++++++++++++---- settings.gradle | 1 + 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/feat/capture/build.gradle.kts b/feat/capture/build.gradle.kts index 58534ea..e9122ff 100644 --- a/feat/capture/build.gradle.kts +++ b/feat/capture/build.gradle.kts @@ -46,6 +46,7 @@ android { dependencies { implementation(project(":common")) + implementation(project(":autotextcorrection")) // Android implementation(AndroidX.CORE_KTX) diff --git a/feat/capture/src/main/java/com/konkuk/capture/ui/enroll/EnrollTextInputViewModel.kt b/feat/capture/src/main/java/com/konkuk/capture/ui/enroll/EnrollTextInputViewModel.kt index 5a9f636..7b44878 100644 --- a/feat/capture/src/main/java/com/konkuk/capture/ui/enroll/EnrollTextInputViewModel.kt +++ b/feat/capture/src/main/java/com/konkuk/capture/ui/enroll/EnrollTextInputViewModel.kt @@ -4,6 +4,7 @@ import android.graphics.Bitmap import android.net.Uri import androidx.lifecycle.SavedStateHandle import androidx.lifecycle.ViewModel +import com.konkuk.autotextcorrection.AutoTextCorrector import com.konkuk.common.data.FoodInfo import dagger.hilt.android.lifecycle.HiltViewModel import kotlinx.coroutines.flow.MutableStateFlow @@ -40,11 +41,14 @@ class EnrollTextInputViewModel @Inject constructor( init { savesStateHandle.get(OCR_RESULT_KEY)?.let { text -> - val result = text.replace(" ", "") - .replace(",", ".") - .replace(")", "") + val correctedText = + AutoTextCorrector(nutritionNameList, 2).correctText(text.replace("%", "% ")) - setFoodInfo(result) + setFoodInfo( + correctedText.replace(" ", "") + .replace(",", ".") + .replace(")", ""), + ) } savesStateHandle.get(API_RESULT_KEY)?.let { setFoodInfo(it) @@ -123,6 +127,18 @@ class EnrollTextInputViewModel @Inject constructor( const val API_RESULT_KEY = "API_RESULT_KEY" const val BITMAP_PICTURE_KEY = "BITMAP_PICTURE_KEY" const val URI_PICTURE_KEY = "URI_PICTURE_KEY" + + val nutritionNameList = listOf( + "칼로리", + "탄수화물", + "단백질", + "당류", + "지방", + "포화지빙", + "트랜스지방", + "콜레스트롤", + "나트륨", + ) } } diff --git a/settings.gradle b/settings.gradle index d9b802f..05e30b1 100644 --- a/settings.gradle +++ b/settings.gradle @@ -20,3 +20,4 @@ include ':feat:personal' include ':feat:history' include ':feat:capture' include ':common' +include ':autotextcorrection' From 0d904b0f380f4db9fb455c80bc652fb3515e334f Mon Sep 17 00:00:00 2001 From: yonghanJu Date: Tue, 14 Nov 2023 22:33:28 +0900 Subject: [PATCH 3/3] =?UTF-8?q?refactor:=20AutoTextCorrector=20=EC=A0=81?= =?UTF-8?q?=EC=9A=A9=20=EC=9D=B4=EC=A0=84=20=EC=BD=94=EB=93=9C=20=EC=88=98?= =?UTF-8?q?=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../capture/ui/enroll/EnrollTextInputViewModel.kt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/feat/capture/src/main/java/com/konkuk/capture/ui/enroll/EnrollTextInputViewModel.kt b/feat/capture/src/main/java/com/konkuk/capture/ui/enroll/EnrollTextInputViewModel.kt index 7b44878..501cb4f 100644 --- a/feat/capture/src/main/java/com/konkuk/capture/ui/enroll/EnrollTextInputViewModel.kt +++ b/feat/capture/src/main/java/com/konkuk/capture/ui/enroll/EnrollTextInputViewModel.kt @@ -14,15 +14,15 @@ import javax.inject.Inject class EnrollTextInputViewModel @Inject constructor( savesStateHandle: SavedStateHandle, ) : ViewModel() { - private val sodiumRegex = Regex("나트[륨룸]([\\d.]+)\\D") + private val sodiumRegex = Regex("나트륨([\\d.]+)\\D") private val carbohydratesRegex = Regex("탄수화물([\\d.]+)\\D") private val fatRegex = Regex("지방([\\d.]+)\\D") - private val cholesterolRegex = Regex("콜[레래러]스[태테]롤([\\d.]+)\\D") + private val cholesterolRegex = Regex("콜레스테롤([\\d.]+)\\D") private val proteinRegex = Regex("단백질([\\d.]+)\\D") private val sugarRegex = Regex("당[류루]([\\d.]+)\\D") - private val transFatRegex = Regex("트[랜렌]스지방([\\d.]+)\\D") + private val transFatRegex = Regex("트랜스지방([\\d.]+)\\D") private val saturatedFatRegex = Regex("포화지방([\\d.]+)\\D") - private val totalGramsRegex = Regex("총내용[량랑](\\d+)g") + private val totalGramsRegex = Regex("총내용량(\\d+)g") private val totalCaloriesRegex = Regex("(\\d+)kca") private val perCaloriesRegex = Regex("(\\d+)g당(\\d+)kca") @@ -136,7 +136,7 @@ class EnrollTextInputViewModel @Inject constructor( "지방", "포화지빙", "트랜스지방", - "콜레스트롤", + "콜레스테롤", "나트륨", ) }