Skip to content

Commit

Permalink
Fix FTS5 error raise when input string has "-" character. (#28)
Browse files Browse the repository at this point in the history
* Fix FTS5 error raise when input string has "-" character.
"-" character is part of FTS5 syntax. These characters need to
be properly escaped by double quoting the input : https://stackoverflow.com/q/28971633

* Add unit test for different characters in input string
  • Loading branch information
subins2000 authored Aug 20, 2022
1 parent f8240ab commit efcd7f6
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 10 deletions.
6 changes: 0 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,3 @@ It's good to install an IME to test changes you make to the library live.
* `patterns` table in learnings DB won't store malayalam patterns. Instead, for each input, all possible malayalam words are calculated (from `symbols` VARNAM_MATCH_ALL) and searched in `words`. These are returned as suggestions. Previously, `pattern` would store every pattern to a word. english => malayalam.

* `patterns` in govarnam is used solely for English words. `Computer => കമ്പ്യൂട്ടർ`. These English words won't work out with our VST tokenizer cause the words are not really transliterable in our language. It would be `kambyoottar => Computer`

### Release Process

* Update version in `constants.go`
* git tag
* make build release
41 changes: 37 additions & 4 deletions govarnam/dictionary.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,24 +124,57 @@ func (varnam *Varnam) searchDictionary(ctx context.Context, words []string, sear
case <-ctx.Done():
return results
default:
vals = append(vals, words[0])
if searchType == searchExactWords {
vals = append(vals, words[0])
} else {
// FTS5 MATCH requires strings to be wrapped in double quotes
// https://stackoverflow.com/q/28971633
// https://github.com/varnamproject/govarnam/issues/27
vals = append(vals, "\""+words[0]+"\"")
}

for i := range words {
if i == 0 {
continue
}
likes += ", (?)"
vals = append(vals, words[i])

if searchType == searchExactWords {
vals = append(vals, words[i])
} else {
vals = append(vals, "\""+words[i]+"\"")
}
}

// Thanks forpas
// CC BY-SA 4.0 licensed
// https://stackoverflow.com/q/68610241/1372424

if searchType == searchMatches {
query = "WITH cte(match) AS (VALUES (?) " + likes + ") SELECT c.match AS match, w.word AS word, MAX(w.weight), MAX(w.learned_on) FROM words_fts w INNER JOIN cte c ON w.word MATCH c.match || '*' GROUP BY c.match"
query = `
WITH cte(match) AS (VALUES (?) ` + likes + `)
SELECT
SUBSTR(c.match, 2, LENGTH(c.match) - 2) AS match, -- Result will be double quoted, remove it
w.word AS word,
MAX(w.weight),
MAX(w.learned_on)
FROM words_fts w
INNER JOIN cte c
ON w.word MATCH c.match || '*'
GROUP BY c.match
`
} else if searchType == searchStartingWith {
query = "WITH cte(match) AS (VALUES (?) " + likes + ") SELECT c.match AS match, w.* FROM words_fts w INNER JOIN cte c ON w.word MATCH c.match || '*' AND w.word != c.match ORDER BY weight DESC LIMIT ?"
query = `
WITH cte(match) AS (VALUES (?) ` + likes + `)
SELECT
SUBSTR(c.match, 2, LENGTH(c.match) - 1) AS match,
w.*
FROM words_fts w
INNER JOIN cte c
ON w.word MATCH c.match || '*'
AND w.word != c.match
ORDER BY weight DESC LIMIT ?
`
vals = append(vals, varnam.DictionarySuggestionsLimit)
} else if searchType == searchExactWords {
query = "SELECT * FROM words WHERE word IN ((?) " + likes + ")"
Expand Down
20 changes: 20 additions & 0 deletions govarnam/govarnam_ml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,26 @@ func TestMLTrain(t *testing.T) {
assertEqual(t, err.Error(), "nothing to unlearn")
}

func TestAnyCharacterInputWillWorkFine(t *testing.T) {
// After working with Ruby on Rails for a while,
// I got the habit of describing method names elaborately
varnam := getVarnamInstance("ml")

varnam.Learn("ഒന്നും", 0)
varnam.Learn("പകൽ", 0)
assertEqual(
t,
varnam.TransliterateAdvanced("onnum!@#$%^&*(പകൽ);'[]?.,`*/kall").DictionarySuggestions[0].Word,
"ഒന്നും!@#$%^&*(പകൽ);'[]?.,`*ഽകല്ല്",
)

assertEqual(
t,
varnam.Transliterate("1-bi yil paTTikkunna kutti?!")[0].Word,
"1-ബി യിൽ പഠിക്കുന്ന കുട്ടി?!",
)
}

// TestML zero width joiner/non-joiner things
func TestMLZW(t *testing.T) {
varnam := getVarnamInstance("ml")
Expand Down

0 comments on commit efcd7f6

Please sign in to comment.