From a5a5babb5b2fda5c58ef22293b4226c84a004bae Mon Sep 17 00:00:00 2001 From: Jaromil Date: Thu, 30 Nov 2023 05:51:42 +0100 Subject: [PATCH 1/4] new zencode_scenarios() lua function for introspection returns a table of scenario names that one can use in load_scenario() --- build/embed-lualibs | 18 +++++++++++++----- src/zen_parse.c | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/build/embed-lualibs b/build/embed-lualibs index 634fdcd60..040f92508 100755 --- a/build/embed-lualibs +++ b/build/embed-lualibs @@ -34,8 +34,9 @@ libs=$(find src/lua -type f -name '*.lua') # emscripten doesn't supports embedding this way # will use --preload-file instead - zen_extensions="" +scenarios="" + [ -r luac45 ] || { curl -Ls -o luac54 https://github.com/dyne/luabinaries/releases/latest/download/luac54 chmod +x luac54 @@ -63,16 +64,23 @@ for i in ${libs}; do echo "" >> ${dst} cd - > /dev/null rm -rf $tmp - ext_nes="{\"${n}\", &${n}_len, (const char *)${n}}," - ext_es="{\"${n}\", &fakelen, \"/$p\"}," + ext_nes="\t{\"${n}\", &${n}_len, (const char *)${n}}," + ext_es="\t{\"${n}\", &fakelen, \"/$p\"}," zen_extensions="${zen_extensions}\n#ifndef __EMSCRIPTEN__\n${ext_nes}\n#else\n${ext_es}\n#endif" + + [ "$(echo "$n" | cut -d'_' -f1)" = "zencode" ] && { + scenarios="${scenarios}\n\t\"$(echo "$n" | cut -d'_' -f2-)\"," + } done cat <> ${dst} #endif // __EMSCRIPTEN__ -zen_extension_t zen_extensions[] = { - $(printf "%b" "$zen_extensions") +zen_extension_t zen_extensions[] = {$(printf "%b" "$zen_extensions") { NULL, NULL, NULL } }; + +const char* const zen_scenarios[] = {$(printf "%b" "$scenarios") + NULL +}; EOF diff --git a/src/zen_parse.c b/src/zen_parse.c index 607f6085d..cdbe9ff50 100644 --- a/src/zen_parse.c +++ b/src/zen_parse.c @@ -262,6 +262,19 @@ static int lua_strtok(lua_State* L) { } #endif +// list scenarios embedded at build time in lualibs_detected.c +extern const char* const zen_scenarios[]; +static int lua_list_scenarios(lua_State* L) { + lua_newtable(L); + register int i; + for(i=0; zen_scenarios[i] != NULL; i++) { + lua_pushnumber(L, i + 1); // Lua arrays are 1-indexed + lua_pushstring(L, zen_scenarios[i]); + lua_settable(L, -3); + } + return 1; +} + void zen_add_parse(lua_State *L) { // override print() and io.write() static const struct luaL_Reg custom_parser [] = @@ -270,6 +283,7 @@ void zen_add_parse(lua_State *L) { {"trim", lua_trim_spaces}, {"trimq", lua_trim_quotes}, {"jsontok", lua_unserialize_json}, + {"zencode_scenarios", lua_list_scenarios}, {NULL, NULL} }; lua_getglobal(L, "_G"); luaL_setfuncs(L, custom_parser, 0); // for Lua versions 5.2 or greater From eafc294def5831a824f9ccd4075c093ba4ed3c88 Mon Sep 17 00:00:00 2001 From: Jaromil Date: Thu, 30 Nov 2023 11:40:52 +0100 Subject: [PATCH 2/4] lua test for introspection and zencode language stats --- test/lua/introspection.lua | 76 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 test/lua/introspection.lua diff --git a/test/lua/introspection.lua b/test/lua/introspection.lua new file mode 100644 index 000000000..4badf5171 --- /dev/null +++ b/test/lua/introspection.lua @@ -0,0 +1,76 @@ + +scenarios = I.spy( zencode_scenarios() ) + +print'' +print( 'Introspection found '.. #scenarios..' scenarios') +print'' + +local before +local after +for _,v in ipairs(scenarios) do + -- before = os.time() + + -- exceptions: data, zencode + if v ~= 'zencode' and v ~= 'data' then + print ('+ load: '..v) + load_scenario('zencode_'..v) + end + -- after = os.time() + -- print (' seconds: '..after-before) +end + +print'' +print( 'All scenarios are loaded now') +print'' + +-- total_statements = ( 0 +-- + table_size(ZEN.when_steps) +-- + table_size(ZEN.given_steps) +-- + table_size(ZEN.then_steps) +-- + table_size(ZEN.foreach_steps) ) +-- print( 'Total Zencode statements: '..total_statements) + +statements = { } +for k,v in pairs(ZEN.when_steps) do table.insert(statements, k) end +for k,v in pairs(ZEN.given_steps) do table.insert(statements, k) end +for k,v in pairs(ZEN.then_steps) do table.insert(statements, k) end +for k,v in pairs(ZEN.foreach_steps) do table.insert(statements, k) end + +tokens = { } +for _,v in ipairs(statements) do + local toks = strtok(trim(v):lower(), ' ') + for _,t in ipairs(toks) do + if t ~= "''" then + if tokens[t] then + tokens[t] = tokens[t] + 1 + else + tokens[t] = 1 + end + end + end +end + +print( 'Hall of fame:') +local function sortbyval(tbl, sortFunction) + local keys = {} + for key in pairs(tbl) do + table.insert(keys, key) + end + + table.sort(keys, function(a, b) + return sortFunction(tbl[a], tbl[b]) + end) + + return keys +end + +local sorted_tokens = sortbyval(tokens, function(a, b) return a < b end) + +for _,v in ipairs(sorted_tokens) do + print(tokens[v]..'\t'..v) +end + +print'' +print( 'Total Zencode statements: '..#statements) +print( 'Total unique word tokens: '..table_size(tokens)) +print'' From cbaee8c12f40cd26364d6307f75909b57d96cf3b Mon Sep 17 00:00:00 2001 From: Jaromil Date: Fri, 3 May 2024 00:38:47 +0200 Subject: [PATCH 3/4] feat: Norvig's spelling corrector loaded and tested --- src/lua/init.lua | 1 + src/lua/spell.lua | 105 +++++++++++++++++++++++++++++++++++++ test/lua/introspection.lua | 6 +++ 3 files changed, 112 insertions(+) create mode 100644 src/lua/spell.lua diff --git a/src/lua/init.lua b/src/lua/init.lua index 76629cd59..664c88ba8 100644 --- a/src/lua/init.lua +++ b/src/lua/init.lua @@ -67,6 +67,7 @@ TIME = require'time' INSPECT = require('inspect') QSORT = require('qsort_op') -- optimized table sort table.sort = QSORT -- override native table sort +SPELL = require('spell') JSON = require('zenroom_json') ECDH = require('zenroom_ecdh') -- ECDH public keys cannot function as ECP because of IANA 7303 diff --git a/src/lua/spell.lua b/src/lua/spell.lua new file mode 100644 index 000000000..f470d70de --- /dev/null +++ b/src/lua/spell.lua @@ -0,0 +1,105 @@ +-- Spelling Corrector. +-- +-- Copyright 2014 Francisco Zamora-Martinez +-- Copyright 2024 Jaromil (Dyne.org) +-- Adaptation of Peter Norvig python Spelling Corrector: +-- http://norvig.com/spell-correct.html +-- Open source code under MIT license: http://www.opensource.org/licenses/mit-license.php + +local yield,wrap = coroutine.yield,coroutine.wrap +local alphabet_str,alphabet = 'abcdefghijklmnopqrstuvwxyz',{} +for a in alphabet_str:gmatch(".") do alphabet[#alphabet+1] = a end +spell = {} + +local function list(w) return pairs{[w]=true} end + +function spell:max(...) + local arg,max,hyp = table.pack(...),0,nil + for w in table.unpack(arg) do + local p = self.model[w] or 1 + if p>max or ( p==max and hyp Date: Wed, 2 Oct 2024 10:25:56 +0200 Subject: [PATCH 4/4] chore: fix indentation --- src/lua/spell.lua | 144 ++++++++++++++++++++++++---------------------- src/zen_parse.c | 16 +++--- 2 files changed, 84 insertions(+), 76 deletions(-) diff --git a/src/lua/spell.lua b/src/lua/spell.lua index f470d70de..96ccdb93f 100644 --- a/src/lua/spell.lua +++ b/src/lua/spell.lua @@ -6,20 +6,22 @@ -- http://norvig.com/spell-correct.html -- Open source code under MIT license: http://www.opensource.org/licenses/mit-license.php -local yield,wrap = coroutine.yield,coroutine.wrap -local alphabet_str,alphabet = 'abcdefghijklmnopqrstuvwxyz',{} -for a in alphabet_str:gmatch(".") do alphabet[#alphabet+1] = a end +local yield, wrap = coroutine.yield, coroutine.wrap +local alphabet_str, alphabet = 'abcdefghijklmnopqrstuvwxyz', {} +for a in alphabet_str:gmatch(".") do alphabet[#alphabet + 1] = a end spell = {} -local function list(w) return pairs{[w]=true} end +local function list(w) return pairs { [w] =true } end function spell:max(...) - local arg,max,hyp = table.pack(...),0,nil - for w in table.unpack(arg) do - local p = self.model[w] or 1 - if p>max or ( p==max and hyp max or (p == max and hyp < w) then + hyp, max = w, p + end + end + return hyp end -- local function words(text) return text:lower():gmatch("[a-z]+") end @@ -31,75 +33,81 @@ end -- local function init(filename) train(words(io.open(filename):read("*a"))) end local function make_yield() - local set = {} - return function(w) - if not set[w] then - set[w] = true - yield(w) - end - end + local set = {} + return function(w) + if not set[w] then + set[w] = true + yield(w) + end + end end local function edits1(word_str, yield) - local yield = yield or make_yield() - return wrap(function() - local splits, word = {}, {} - for i=1,#word_str do - word[i],splits[i] = word_str:sub(i,i),{word_str:sub(1,i),word_str:sub(i)} - end - -- sentinels - splits[0], splits[#word_str+1] = { "", word_str }, { word_str, ""} - -- deletes - for i=1,#word_str do yield( splits[i-1][1]..splits[i+1][2] ) end - -- transposes - for i=1,#word_str-1 do - yield( splits[i-1][1]..word[i+1]..word[i]..splits[i+2][2] ) - end - -- replaces - for i=1,#word_str do - for j=1,#alphabet do - yield( splits[i-1][1]..alphabet[j]..splits[i+1][2] ) - end - end - -- inserts - for i=0,#word_str do - for j=1,#alphabet do - yield( splits[i][1]..alphabet[j]..splits[i+1][2] ) - end - end - end) + local yield = yield or make_yield() + return wrap(function() + local splits, word = {}, {} + for i = 1, #word_str do + word[i], splits[i] = word_str:sub(i, i), {word_str:sub(1, i), word_str:sub(i)} + end + -- sentinels + splits[0], splits[#word_str + 1] = {"", word_str}, {word_str, ""} + -- deletes + for i = 1, #word_str do + yield(splits[i - 1][1] .. splits[i + 1][2]) + end + -- transposes + for i = 1, #word_str - 1 do + yield(splits[i - 1][1] .. word[i + 1] .. word[i] .. splits[i + 2][2]) + end + -- replaces + for i = 1, #word_str do + for j = 1, #alphabet do + yield(splits[i - 1][1] .. alphabet[j] .. splits[i + 1][2]) + end + end + -- inserts + for i = 0, #word_str do + for j = 1, #alphabet do + yield(splits[i][1] .. alphabet[j] .. splits[i + 1][2]) + end + end + end) end function spell:known_edits2(w, set) - local yield,yield2 = make_yield(),make_yield() - return wrap(function() - for e1 in edits1(w) do - for e2 in edits1(e1,yield2) do - if self.model[e2] then yield( e2 ) end - end - end - end) + local yield, yield2 = make_yield(), make_yield() + return wrap(function() + for e1 in edits1(w) do + for e2 in edits1(e1, yield2) do + if self.model[e2] then + yield(e2) + end + end + end + end) end -function spell:known(list,aux) - return wrap(function() - for w in list,aux do - if self.model[w] then yield(w) end - end - end) +function spell:known(list, aux) + return wrap(function() + for w in list, aux do + if self.model[w] then + yield(w) + end + end + end) end function spell:correct(w) - local w = w:lower() - local result = self:max(self:known(list(w))) - or self:max(self:known(edits1(w))) - or self:max(self:known_edits2(w)) - or self:max(list(w)) - if result then - return result - else - return false,"No suggestion found for word: "..w - end + local w = w:lower() + local result = self:max(self:known(list(w))) + or self:max(self:known(edits1(w))) + or self:max(self:known_edits2(w)) + or self:max(list(w)) + if result then + return result + else + return false, "No suggestion found for word: " .. w + end end return spell diff --git a/src/zen_parse.c b/src/zen_parse.c index cdbe9ff50..0d91edda3 100644 --- a/src/zen_parse.c +++ b/src/zen_parse.c @@ -265,14 +265,14 @@ static int lua_strtok(lua_State* L) { // list scenarios embedded at build time in lualibs_detected.c extern const char* const zen_scenarios[]; static int lua_list_scenarios(lua_State* L) { - lua_newtable(L); - register int i; - for(i=0; zen_scenarios[i] != NULL; i++) { - lua_pushnumber(L, i + 1); // Lua arrays are 1-indexed - lua_pushstring(L, zen_scenarios[i]); - lua_settable(L, -3); - } - return 1; + lua_newtable(L); + register int i; + for (i = 0; zen_scenarios[i] != NULL; i++) { + lua_pushnumber(L, i + 1); // Lua arrays are 1-indexed + lua_pushstring(L, zen_scenarios[i]); + lua_settable(L, -3); + } + return 1; } void zen_add_parse(lua_State *L) {