-
Notifications
You must be signed in to change notification settings - Fork 21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
word breakers + inscript learning hack #102
base: master
Are you sure you want to change the base?
Changes from all commits
4de2fdb
a431811
9cf4fb3
d01aeeb
318e6a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,13 @@ virama "~" => "്" | |
|
||
infer_dead_consonants true | ||
|
||
word_breakers "." => ".", | ||
"," => ",", | ||
"?" => "?", | ||
"!" => "!", | ||
"(" => "(", | ||
")" => ")" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This need not be a hash. Just a simple array will do, right? Something like,
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And you don't need one in |
||
vowels "a" => "അ", | ||
[["a"], "aa", "A"] => ["ആ", "ാ"], | ||
"i" => ["ഇ", "ി"], | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,18 @@ infer_dead_consonants false | |
$zwnj = "\u{200c}" | ||
$zwj = "\u{200d}" | ||
|
||
#word_breakers are symbols that denote the end | ||
#of the sentence the user is typing. When a word | ||
#breaker is encountered, Ibus commits the typed word | ||
#and begins a new word | ||
|
||
word_breakers "." => ".", | ||
"," => ",", | ||
"?" => "?", | ||
"!" => "!", | ||
"(" => "(", | ||
")" => ")" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Array would be better, same like above |
||
vowels "D" => "അ", | ||
"E" => "ആ", | ||
"F" => "ഇ", | ||
|
@@ -82,7 +93,18 @@ consonants "k" => "ക", | |
"J" => "റ", | ||
"#" => "്ര", | ||
"&" => "ക്ഷ", | ||
"=" => "ൃ" | ||
"=" => "ൃ", | ||
"ൻ" => "ൻ", | ||
"ൺ" => "ൺ", | ||
"ൽ" => "ൽ", | ||
"ൾ" => "ൾ", | ||
"ർ" => "ർ" | ||
#The above chill maps are necessary due to a bug | ||
#inscript treats atomic chill as a token | ||
#However, the token is not in the vst symbols table | ||
#This somehow makes varnam assign the type '10' (VARNAM_TOKEN_OTHER) to the chill | ||
#If a word contains tokens of type 10, it is not learned. | ||
#So the absurd non-sensical chills stay there for the time being | ||
|
||
numbers "1" => "൧", | ||
"2" => "൨", | ||
|
@@ -97,8 +119,10 @@ numbers "1" => "൧", | |
|
||
symbols "_" => "ഃ" | ||
|
||
others "]" => $zwj, | ||
"\\" => $zwnj | ||
#non-joiner "\\" => $zwnj | ||
joiner "]" => $zwj | ||
|
||
|
||
|
||
|
||
|
||
|
@@ -117,4 +141,4 @@ others "]" => $zwj, | |
|
||
|
||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -629,6 +629,52 @@ vst_add_metadata (varnam *handle, const char* key, const char* value) | |
return VARNAM_SUCCESS; | ||
} | ||
|
||
int | ||
vst_get_word_breakers(varnam *handle, strbuf *list) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, return |
||
{ | ||
int rc; | ||
sqlite3 *db; | ||
sqlite3_stmt *stmt; | ||
char *sql = "select pattern from symbols where type=?1"; | ||
|
||
db = handle->internal->db; | ||
|
||
rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); | ||
if(rc != SQLITE_OK) | ||
{ | ||
set_last_error(handle, "Failed to prepare statement : %s", sqlite3_errmsg(db)); | ||
sqlite3_finalize(stmt); | ||
return VARNAM_ERROR; | ||
} | ||
|
||
rc = sqlite3_bind_int(stmt, 1, VARNAM_WORD_BREAKER); | ||
if(rc != SQLITE_OK) | ||
{ | ||
sqlite3_finalize(stmt); | ||
set_last_error(handle, "Could not bind : %s", sqlite3_errmsg(db)); | ||
return VARNAM_ERROR; | ||
} | ||
|
||
rc = sqlite3_step(stmt); | ||
|
||
while(rc == SQLITE_ROW) | ||
{ | ||
strbuf_add(list, sqlite3_column_text(stmt, 0)); | ||
printf("%s\n", strbuf_to_s(list)); | ||
rc = sqlite3_step(stmt); | ||
} | ||
|
||
if(rc != SQLITE_DONE) | ||
{ | ||
set_last_error(handle, "%s", sqlite3_errmsg(db)); | ||
sqlite3_finalize(stmt); | ||
return VARNAM_ERROR; | ||
} | ||
|
||
sqlite3_finalize(stmt); | ||
return VARNAM_SUCCESS; | ||
} | ||
|
||
int | ||
vst_load_scheme_details(varnam *handle, vscheme_details *output) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -524,6 +524,31 @@ varnam_get_all_scheme_details() | |
return schemeDetails; | ||
} | ||
|
||
/*For use with ibus*/ | ||
/*To Do : Document properly*/ | ||
/*allocated - size already allocated to char *word_breakers*/ | ||
int | ||
varnam_word_breakers(varnam *handle, char *word_breakers, int allocated) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should return |
||
{ | ||
int rc; | ||
strbuf *list = get_pooled_string(handle); | ||
|
||
rc = vst_get_word_breakers(handle, list); | ||
if(rc != VARNAM_SUCCESS) | ||
{ | ||
set_last_error(handle, "Could not obtain word breakers"); | ||
return VARNAM_ERROR; | ||
} | ||
else | ||
{ | ||
if(list->length > allocated) | ||
word_breakers = (char*)realloc(word_breakers, allocated + (list->length - allocated + 1)); | ||
|
||
strcpy(word_breakers, strbuf_to_s(list)); | ||
return VARNAM_SUCCESS; | ||
} | ||
} | ||
|
||
int | ||
varnam_get_scheme_details(varnam *handle, vscheme_details **details) | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd make this method return
varray
instead ofchar*
.varray
should contain all the word breakers configured in the scheme file.