Skip to content

Commit

Permalink
feat: add AutoTextCorrector
Browse files Browse the repository at this point in the history
  • Loading branch information
yonghanJu committed Nov 14, 2023
1 parent 412f1dc commit e47c439
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 0 deletions.
13 changes: 13 additions & 0 deletions autotextcorrection/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
plugins {
id("java-library")
id("org.jetbrains.kotlin.jvm")
}

java {
sourceCompatibility = JavaVersion.VERSION_1_7
targetCompatibility = JavaVersion.VERSION_1_7
}

dependencies {
implementation(UnitTest.JUNIT)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package com.konkuk.autotextcorrection

class AutoTextCorrector(
targetTextList: List<String>,
val errorCorrectionDistance: Int = 1,
) {

private val targetWordsMap = mutableMapOf<String, String>()
private val targetTextDestructedList = targetTextList.map { word ->
val destructedWord = destructWord(word)
targetWordsMap[destructedWord] = word
destructedWord
}

fun destructWord(word: String): String {
var result = StringBuilder()
for (element in word) {
var uniVal = element

if (uniVal.code >= 0xAC00) {
uniVal = (uniVal.code - 0xAC00).toChar()
val cho = (uniVal.code / 28 / 21).toChar()
val joong = (uniVal.code / 28 % 21).toChar()
val jong = (uniVal.code % 28).toChar()
result.append(INITIAL_CHARACTER[cho.code] + MID_CHARACTER[joong.code] + FINAL_CHARACTER[jong.code])
} else {
result.append(element)
}
}
return String(result)
}

fun correctWord(word: String): String {
if (word.isEmpty()) return word
val destructedWord = destructWord(word)
for (destructedChars in targetTextDestructedList) {
val dp = List(destructedChars.length + 1) { IntArray(destructedWord.length + 1) }
for (i in 1..destructedChars.length) {
for (j in 1..destructedWord.length) {
if (destructedChars[i - 1] == destructedWord[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1
} else {
dp[i][j] = maxOf(dp[i - 1][j], dp[i][j - 1])
}
}
}
if (abs(dp.last().last() - destructedChars.length) <= errorCorrectionDistance &&
abs(dp.last().last() - destructedWord.length) <= errorCorrectionDistance
) {
return targetWordsMap[destructedChars]!!
}
}
return word
}

fun correctText(text: String): String {
return text.split(" ").map { word -> correctWord(word) }.joinToString(" ")
}

private fun abs(n: Int) = if (n < 0) -n else n

companion object {
private val INITIAL_CHARACTER = listOf(
"", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "",
)

private val MID_CHARACTER = listOf(
"", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "",
)

private val FINAL_CHARACTER = listOf(
"", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package com.konkuk.autotextcorrection

import org.junit.Assert
import org.junit.Test

class AutoTextCorrectorUnitTest {

private val targetTextList = listOf("칼로리", "나트륨", "단백질", "탄수화물")
private val corrector = AutoTextCorrector(
targetTextList = targetTextList,
errorCorrectionDistance = 2,
)

@Test
fun `destruct_corrector가 string을 char로 분해`() {
Assert.assertEquals(
targetTextList.map { corrector.destructWord(it) },
listOf(
"ㅋㅏㄹㄹㅗㄹㅣ",
"ㄴㅏㅌㅡㄹㅠㅁ",
"ㄷㅏㄴㅂㅐㄱㅈㅣㄹ",
"ㅌㅏㄴㅅㅜㅎㅘㅁㅜㄹ",
),
)
}

@Test
fun `correct_corrector가 word를 수정`() {
Assert.assertEquals(
"나트륨",
corrector.correctWord("니트륨"),
)

Assert.assertEquals(
"탄수화물",
corrector.correctWord("탐수화믈"),
)
}

@Test
fun `correct_corrector가 오류를 수정`() {
Assert.assertEquals(
"나트륨 정말 맛있어,ㅎㅎ 그리고 나는 탄수화물 매우 좋아하지!! 칼로리 너무 행복",
corrector.correctText("니트륨 정말 맛있어,ㅎㅎ 그리고 나는 탐수화믈 매우 좋아하지!! 칼로ㅏ 너무 행복"),
)
}
}
9 changes: 9 additions & 0 deletions textautoconvert/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
plugins {
id("java-library")
id("org.jetbrains.kotlin.jvm")
}

java {
sourceCompatibility = JavaVersion.VERSION_1_7
targetCompatibility = JavaVersion.VERSION_1_7
}

0 comments on commit e47c439

Please sign in to comment.