Skip to content

Commit

Permalink
Compacting the learning file after learned from a file/directory
Browse files Browse the repository at this point in the history
This reduces the resulting file size. closes varnamproject#96
  • Loading branch information
navaneeth committed Mar 18, 2015
1 parent bc13d69 commit 1761aa3
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 14 deletions.
14 changes: 14 additions & 0 deletions api.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,20 @@ varnam_learn_from_file(varnam *handle,
void (*callback)(varnam *handle, const char *word, int status, void *object),
void *object);

/**
* Learnings file will be compacted/ Mostly this will reduce the file size
*
* handle - Valid varnam instance
*
* RETURN
*
* VARNAM_SUCCESS - Upon successful execution
* VARNAM_ERROR - Any errors occured during compact
*
* */
VARNAM_EXPORT extern int
varnam_compact_learnings_file(varnam *handle);

/**
* Set scheme details. This will overwrite any scheme details set before
*
Expand Down
13 changes: 10 additions & 3 deletions learn.c
Original file line number Diff line number Diff line change
Expand Up @@ -624,14 +624,21 @@ varnam_learn_from_file(varnam *handle,
varnam_log (handle, "Failed to check file integrity");
}

varnam_log (handle, "Compacting file");
rc = vwt_compact_file (handle);
if (rc) return rc;

fclose (infile);
return rc;
}


int
varnam_compact_learnings_file(varnam *handle)
{
int rc;
varnam_log (handle, "Compacting file");
rc = vwt_compact_file (handle);
if (rc) return rc;
}

int
varnam_train(varnam *handle, const char *pattern, const char *word)
{
Expand Down
19 changes: 16 additions & 3 deletions varnamc
Original file line number Diff line number Diff line change
Expand Up @@ -1098,13 +1098,14 @@ def learn_from_file
$failure_log.close if not $failure_log.nil?
end

def learn_words_in_the_file(fname)
def learn_words_in_the_file(fname, compact = true)
done = VarnamLibrary.varnam_learn_from_file($varnam_handle.get_pointer(0), fname, nil, LearnCallback, nil);
if done != 0
error_message = VarnamLibrary.varnam_get_last_error($varnam_handle.get_pointer(0))
puts error_message
exit(1)
end
compact_learnings_file if compact
end

def learn_from_directory
Expand All @@ -1121,12 +1122,24 @@ def learn_from_directory
files.each_with_index do |fname, index|
if not File.directory?(fname)
puts "(#{index + 1}/#{files.size}) Processing #{fname}"
learn_words_in_the_file fname
learn_words_in_the_file fname, false
end
end

compact_learnings_file

puts "Processed #{$learn_counter} word(s). #{$learn_passed_counter} word(s) passed. #{$learn_failed_counter} word(s) failed."
puts "Failed words are logged to - #{$failure_log.path}" if $learn_failed_counter > 0
$failure_log.close
$failure_log.close unless $failure_log.nil?
end

def compact_learnings_file
puts "Compacting the generated file..."
done = VarnamLibrary.varnam_compact_learnings_file($varnam_handle.get_pointer(0));
if done != 0
error_message = VarnamLibrary.varnam_get_last_error($varnam_handle.get_pointer(0))
raise error_message
end
end

def train_words_in_the_file(fname)
Expand Down
1 change: 1 addition & 0 deletions varnamruby.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class Word < FFI::Struct
attach_function :varnam_learn, [:pointer, :string], :int
attach_function :varnam_train, [:pointer, :string, :string], :int
attach_function :varnam_learn_from_file, [:pointer, :string, :pointer, :pointer, :pointer], :int
attach_function :varnam_compact_learnings_file, [:pointer], :int
attach_function :varnam_create_token, [:pointer, :string, :string, :string, :string, :string, :int, :int, :int, :int, :int], :int
attach_function :varnam_set_scheme_details, [:pointer, :pointer], :int
attach_function :varnam_get_all_scheme_details, [], :pointer
Expand Down
14 changes: 6 additions & 8 deletions words-table.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ execute_sql(varnam *handle, sqlite3 *db, const char *sql)

rc = sqlite3_exec(db, sql, NULL, 0, &zErrMsg);
if( rc != SQLITE_OK ){
set_last_error (handle, "Failed to write : %s", zErrMsg);
set_last_error (handle, "Failed to execute : %s", zErrMsg);
sqlite3_free(zErrMsg);
return VARNAM_ERROR;
}
Expand Down Expand Up @@ -114,15 +114,13 @@ vwt_turn_off_optimization_for_huge_transaction(varnam *handle)
int
vwt_compact_file (varnam *handle)
{
/*const char *sql =*/
/*"VACUUM;";*/
const char *sql =
"VACUUM;";

/*assert (handle);*/
/*assert (v_->known_words);*/
assert (handle);
assert (v_->known_words);

/*return execute_sql (handle, v_->known_words, sql);*/
/* Not doing any compacting */
return VARNAM_SUCCESS;
return execute_sql (handle, v_->known_words, sql);
}

int
Expand Down

0 comments on commit 1761aa3

Please sign in to comment.