From 1761aa396a19ca0972ceb2858298bba5022ab1ed Mon Sep 17 00:00:00 2001 From: Navaneeth K N Date: Wed, 18 Mar 2015 17:17:07 +0530 Subject: [PATCH] Compacting the learning file after learned from a file/directory This reduces the resulting file size. closes #96 --- api.h | 14 ++++++++++++++ learn.c | 13 ++++++++++--- varnamc | 19 ++++++++++++++++--- varnamruby.rb | 1 + words-table.c | 14 ++++++-------- 5 files changed, 47 insertions(+), 14 deletions(-) diff --git a/api.h b/api.h index a18e9df..e23e052 100644 --- a/api.h +++ b/api.h @@ -264,6 +264,20 @@ varnam_learn_from_file(varnam *handle, void (*callback)(varnam *handle, const char *word, int status, void *object), void *object); +/** + * Learnings file will be compacted/ Mostly this will reduce the file size + * + * handle - Valid varnam instance + * + * RETURN + * + * VARNAM_SUCCESS - Upon successful execution + * VARNAM_ERROR - Any errors occured during compact + * + * */ +VARNAM_EXPORT extern int +varnam_compact_learnings_file(varnam *handle); + /** * Set scheme details. This will overwrite any scheme details set before * diff --git a/learn.c b/learn.c index cb2f068..4f8317d 100644 --- a/learn.c +++ b/learn.c @@ -624,14 +624,21 @@ varnam_learn_from_file(varnam *handle, varnam_log (handle, "Failed to check file integrity"); } - varnam_log (handle, "Compacting file"); - rc = vwt_compact_file (handle); - if (rc) return rc; fclose (infile); return rc; } + +int +varnam_compact_learnings_file(varnam *handle) +{ + int rc; + varnam_log (handle, "Compacting file"); + rc = vwt_compact_file (handle); + if (rc) return rc; +} + int varnam_train(varnam *handle, const char *pattern, const char *word) { diff --git a/varnamc b/varnamc index 09e8988..df455ae 100755 --- a/varnamc +++ b/varnamc @@ -1098,13 +1098,14 @@ def learn_from_file $failure_log.close if not $failure_log.nil? end -def learn_words_in_the_file(fname) +def learn_words_in_the_file(fname, compact = true) done = VarnamLibrary.varnam_learn_from_file($varnam_handle.get_pointer(0), fname, nil, LearnCallback, nil); if done != 0 error_message = VarnamLibrary.varnam_get_last_error($varnam_handle.get_pointer(0)) puts error_message exit(1) end + compact_learnings_file if compact end def learn_from_directory @@ -1121,12 +1122,24 @@ def learn_from_directory files.each_with_index do |fname, index| if not File.directory?(fname) puts "(#{index + 1}/#{files.size}) Processing #{fname}" - learn_words_in_the_file fname + learn_words_in_the_file fname, false end end + + compact_learnings_file + puts "Processed #{$learn_counter} word(s). #{$learn_passed_counter} word(s) passed. #{$learn_failed_counter} word(s) failed." puts "Failed words are logged to - #{$failure_log.path}" if $learn_failed_counter > 0 - $failure_log.close + $failure_log.close unless $failure_log.nil? +end + +def compact_learnings_file + puts "Compacting the generated file..." + done = VarnamLibrary.varnam_compact_learnings_file($varnam_handle.get_pointer(0)); + if done != 0 + error_message = VarnamLibrary.varnam_get_last_error($varnam_handle.get_pointer(0)) + raise error_message + end end def train_words_in_the_file(fname) diff --git a/varnamruby.rb b/varnamruby.rb index 3245c0d..678942e 100644 --- a/varnamruby.rb +++ b/varnamruby.rb @@ -51,6 +51,7 @@ class Word < FFI::Struct attach_function :varnam_learn, [:pointer, :string], :int attach_function :varnam_train, [:pointer, :string, :string], :int attach_function :varnam_learn_from_file, [:pointer, :string, :pointer, :pointer, :pointer], :int + attach_function :varnam_compact_learnings_file, [:pointer], :int attach_function :varnam_create_token, [:pointer, :string, :string, :string, :string, :string, :int, :int, :int, :int, :int], :int attach_function :varnam_set_scheme_details, [:pointer, :pointer], :int attach_function :varnam_get_all_scheme_details, [], :pointer diff --git a/words-table.c b/words-table.c index 7716986..ae6e5f8 100644 --- a/words-table.c +++ b/words-table.c @@ -61,7 +61,7 @@ execute_sql(varnam *handle, sqlite3 *db, const char *sql) rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg); if( rc != SQLITE_OK ){ - set_last_error (handle, "Failed to write : %s", zErrMsg); + set_last_error (handle, "Failed to execute : %s", zErrMsg); sqlite3_free(zErrMsg); return VARNAM_ERROR; } @@ -114,15 +114,13 @@ vwt_turn_off_optimization_for_huge_transaction(varnam *handle) int vwt_compact_file (varnam *handle) { - /*const char *sql =*/ - /*"VACUUM;";*/ + const char *sql = + "VACUUM;"; - /*assert (handle);*/ - /*assert (v_->known_words);*/ + assert (handle); + assert (v_->known_words); - /*return execute_sql (handle, v_->known_words, sql);*/ - /* Not doing any compacting */ - return VARNAM_SUCCESS; + return execute_sql (handle, v_->known_words, sql); } int