-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
147 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
plugins { | ||
id("java-library") | ||
id("org.jetbrains.kotlin.jvm") | ||
} | ||
|
||
java { | ||
sourceCompatibility = JavaVersion.VERSION_1_7 | ||
targetCompatibility = JavaVersion.VERSION_1_7 | ||
} | ||
|
||
dependencies { | ||
implementation(UnitTest.JUNIT) | ||
} |
78 changes: 78 additions & 0 deletions
78
autotextcorrection/src/main/java/com/konkuk/autotextcorrection/AutoTextCorrector.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package com.konkuk.autotextcorrection | ||
|
||
class AutoTextCorrector( | ||
targetTextList: List<String>, | ||
val errorCorrectionDistance: Int = 1, | ||
) { | ||
|
||
private val targetWordsMap = mutableMapOf<String, String>() | ||
private val targetTextDestructedList = targetTextList.map { word -> | ||
val destructedWord = destructWord(word) | ||
targetWordsMap[destructedWord] = word | ||
destructedWord | ||
} | ||
|
||
fun destructWord(word: String): String { | ||
var result = StringBuilder() | ||
for (element in word) { | ||
var uniVal = element | ||
|
||
if (uniVal.code >= 0xAC00) { | ||
uniVal = (uniVal.code - 0xAC00).toChar() | ||
val cho = (uniVal.code / 28 / 21).toChar() | ||
val joong = (uniVal.code / 28 % 21).toChar() | ||
val jong = (uniVal.code % 28).toChar() | ||
result.append(INITIAL_CHARACTER[cho.code] + MID_CHARACTER[joong.code] + FINAL_CHARACTER[jong.code]) | ||
} else { | ||
result.append(element) | ||
} | ||
} | ||
return String(result) | ||
} | ||
|
||
fun correctWord(word: String): String { | ||
if (word.isEmpty()) return word | ||
val destructedWord = destructWord(word) | ||
for (destructedChars in targetTextDestructedList) { | ||
val dp = List(destructedChars.length + 1) { IntArray(destructedWord.length + 1) } | ||
for (i in 1..destructedChars.length) { | ||
for (j in 1..destructedWord.length) { | ||
if (destructedChars[i - 1] == destructedWord[j - 1]) { | ||
dp[i][j] = dp[i - 1][j - 1] + 1 | ||
} else { | ||
dp[i][j] = maxOf(dp[i - 1][j], dp[i][j - 1]) | ||
} | ||
} | ||
} | ||
if (abs(dp.last().last() - destructedChars.length) <= errorCorrectionDistance && | ||
abs(dp.last().last() - destructedWord.length) <= errorCorrectionDistance | ||
) { | ||
return targetWordsMap[destructedChars]!! | ||
} | ||
} | ||
return word | ||
} | ||
|
||
fun correctText(text: String): String { | ||
return text.split(" ").map { word -> correctWord(word) }.joinToString(" ") | ||
} | ||
|
||
private fun abs(n: Int) = if (n < 0) -n else n | ||
|
||
companion object { | ||
private val INITIAL_CHARACTER = listOf( | ||
"ㄱ", "ㄲ", "ㄴ", "ㄷ", "ㄸ", "ㄹ", "ㅁ", "ㅂ", "ㅃ", | ||
"ㅅ", "ㅆ", "ㅇ", "ㅈ", "ㅉ", "ㅊ", "ㅋ", "ㅌ", "ㅍ", "ㅎ", | ||
) | ||
|
||
private val MID_CHARACTER = listOf( | ||
"ㅏ", "ㅐ", "ㅑ", "ㅒ", "ㅓ", "ㅔ", "ㅕ", "ㅖ", "ㅗ", "ㅘ", | ||
"ㅙ", "ㅚ", "ㅛ", "ㅜ", "ㅝ", "ㅞ", "ㅟ", "ㅠ", "ㅡ", "ㅢ", "ㅣ", | ||
) | ||
|
||
private val FINAL_CHARACTER = listOf( | ||
"", "ㄱ", "ㄲ", "ㄳ", "ㄴ", "ㄵ", "ㄶ", "ㄷ", "ㄹ", "ㄺ", "ㄻ", "ㄼ", | ||
"ㄽ", "ㄾ", "ㄿ", "ㅀ", "ㅁ", "ㅂ", "ㅄ", "ㅅ", "ㅆ", "ㅇ", "ㅈ", "ㅊ", "ㅋ", "ㅌ", "ㅍ", "ㅎ", | ||
) | ||
} | ||
} |
47 changes: 47 additions & 0 deletions
47
autotextcorrection/src/test/java/com/konkuk/autotextcorrection/AutoTextCorrectorUnitTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
package com.konkuk.autotextcorrection | ||
|
||
import org.junit.Assert | ||
import org.junit.Test | ||
|
||
class AutoTextCorrectorUnitTest { | ||
|
||
private val targetTextList = listOf("칼로리", "나트륨", "단백질", "탄수화물") | ||
private val corrector = AutoTextCorrector( | ||
targetTextList = targetTextList, | ||
errorCorrectionDistance = 2, | ||
) | ||
|
||
@Test | ||
fun `destruct_corrector가 string을 char로 분해`() { | ||
Assert.assertEquals( | ||
targetTextList.map { corrector.destructWord(it) }, | ||
listOf( | ||
"ㅋㅏㄹㄹㅗㄹㅣ", | ||
"ㄴㅏㅌㅡㄹㅠㅁ", | ||
"ㄷㅏㄴㅂㅐㄱㅈㅣㄹ", | ||
"ㅌㅏㄴㅅㅜㅎㅘㅁㅜㄹ", | ||
), | ||
) | ||
} | ||
|
||
@Test | ||
fun `correct_corrector가 word를 수정`() { | ||
Assert.assertEquals( | ||
"나트륨", | ||
corrector.correctWord("니트륨"), | ||
) | ||
|
||
Assert.assertEquals( | ||
"탄수화물", | ||
corrector.correctWord("탐수화믈"), | ||
) | ||
} | ||
|
||
@Test | ||
fun `correct_corrector가 오류를 수정`() { | ||
Assert.assertEquals( | ||
"나트륨 정말 맛있어,ㅎㅎ 그리고 나는 탄수화물 매우 좋아하지!! 칼로리 너무 행복", | ||
corrector.correctText("니트륨 정말 맛있어,ㅎㅎ 그리고 나는 탐수화믈 매우 좋아하지!! 칼로ㅏ 너무 행복"), | ||
) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
plugins { | ||
id("java-library") | ||
id("org.jetbrains.kotlin.jvm") | ||
} | ||
|
||
java { | ||
sourceCompatibility = JavaVersion.VERSION_1_7 | ||
targetCompatibility = JavaVersion.VERSION_1_7 | ||
} |