diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index afd84386..6b1a4507 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -25,6 +25,7 @@ on:
env:
ICUDIR: ${{ github.workspace }}/thirdparty/icu
+ JSONDIR: ${{ github.workspace }}/thirdparty/json
jobs:
manylinux2010_x86_64:
@@ -50,7 +51,7 @@ jobs:
~/ccache
~/pipcache
- name: build and run C++ unit tests
- run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2010_x86_64:$MANYLINUX2010_X86_64_TAG /iknow/actions/build_manylinux.sh
+ run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2010_x86_64:$MANYLINUX2010_X86_64_TAG /iknow/actions/build_manylinux.sh
- name: upload wheel artifact
uses: actions/upload-artifact@v2
with:
@@ -95,7 +96,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y qemu-user-static binfmt-support
- name: build and run C++ unit tests
- run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_aarch64:$MANYLINUX2014_AARCH64_TAG /iknow/actions/build_manylinux.sh
+ run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_aarch64:$MANYLINUX2014_AARCH64_TAG /iknow/actions/build_manylinux.sh
- name: upload wheel artifact
uses: actions/upload-artifact@v2
with:
@@ -126,7 +127,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y qemu-user-static binfmt-support
- name: build and run C++ unit tests
- run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_ppc64le:$MANYLINUX2014_PPC64LE_TAG /iknow/actions/build_manylinux.sh
+ run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_ppc64le:$MANYLINUX2014_PPC64LE_TAG /iknow/actions/build_manylinux.sh
- name: upload wheel artifact
uses: actions/upload-artifact@v2
with:
diff --git a/Makefile b/Makefile
index 1df6ec61..7db34dd8 100644
--- a/Makefile
+++ b/Makefile
@@ -11,9 +11,11 @@ include $(ROOT_DIR)/build/make/platforms/$(PLATFORM).mak
all : engine
-test : enginetest
+test : enginetest engineloadtest
$(RUNTIMELOADPATHVAR)=$(ROOT_DIR)/kit/$(PLATFORM)/$(MODE)/bin:$(ICUDIR)/lib $(ROOT_DIR)/kit/$(PLATFORM)/$(MODE)/bin/iknowenginetest
+engineloadtest : engine base
+ $(MAKE) -f $(ROOT_DIR)/modules/iKnowExplicitTest/engineloadtest.mak
languagecompiler : base core
$(MAKE) -f $(ROOT_DIR)/modules/compiler/iKnowLanguageCompiler/languagecompiler.mak
enginetest : engine base
diff --git a/README.md b/README.md
index 88157499..e63b8d0b 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ This readme file has everything you need to get started, but make sure you click
- [From Python](#from-python)
- [From C++](#from-c)
- [From InterSystems IRIS](#from-intersystems-iris)
+ - [From Different Platforms](#from-other-platforms)
- [Understanding iKnow](#understanding-iknow)
- [Entities](#entities)
- [Attributes](#attributes)
@@ -75,6 +76,14 @@ For many years, the iKnow engine has been available as an embedded service on th
The [InterSystems IRIS Community Edition](https://docs.intersystems.com/irislatest/csp/docbook/DocBook.UI.Page.cls?KEY=ACLOUD) is available from Docker Hub free of charge.
+## From Different Platforms
+
+Since version 1.3, a C-interface is available, enabling communication with the iKnow engine in a JSON encoded request/response style:
+```C
+const char* j_response;
+iknow_json(R"({"method" : "index", "language" : "en", "text_source" : "Hello World"})", &j_response);
+```
+Most API functionality is available in a serialized json format.
# Understanding iKnow
diff --git a/actions/build_manylinux.sh b/actions/build_manylinux.sh
index 3c42a5ce..3161daef 100755
--- a/actions/build_manylinux.sh
+++ b/actions/build_manylinux.sh
@@ -10,6 +10,7 @@
# - CCACHE_MAXSIZE is the size limit for files held with ccache
# - PIP_CACHE_DIR is the location that pip caches files
# - ICU_URL is the URL to a .zip source release of ICU
+# - JSON_URL is the URL of the C++ JSON project on Github
set -euxo pipefail
@@ -58,6 +59,17 @@ if ! [ -f "$ICUDIR/iknow_icu_url.txt" ] || [ $(cat "$ICUDIR/iknow_icu_url.txt")
echo "$ICU_URL" > "$ICUDIR/iknow_icu_url.txt"
fi
+##### Build JSON C++
+export JSONDIR=/iknow/thirdparty/json
+if ! [ -f "$JSONDIR/iknow_json_url.txt" ] || [ $(cat "$JSONDIR/iknow_json_url.txt") != "$JSON_URL" ]; then
+ rm -rf "$JSONDIR"
+ cd /iknow/thirdparty
+ git clone "$JSON_URL"
+ cd json
+ git checkout v3.10.2
+ echo "$JSON_URL" > "$JSONDIR/iknow_json_url.txt"
+fi
+export JSON_INCLUDE=$JSONDIR/single_include
##### Build iKnow engine and run C++ unit tests #####
cd /iknow
diff --git a/actions/build_osx.sh b/actions/build_osx.sh
index 271759af..a878fccb 100755
--- a/actions/build_osx.sh
+++ b/actions/build_osx.sh
@@ -9,6 +9,8 @@
# Required Environment Variables:
# - ICU_URL is the URL to a .zip source release of ICU
# - ICUDIR is the directory to install ICU
+# - JSON_URL is the URL of the C++ JSON project on Github
+# - JSONDIR is the directory to install the JSON header
# - MACOSX_DEPLOYMENT_TARGET is the minimum supported Mac OS X version
set -euxo pipefail
@@ -29,6 +31,17 @@ if ! [ -f "$ICUDIR/iknow_icu_url.txt" ] || [ $(cat "$ICUDIR/iknow_icu_url.txt")
echo "$ICU_URL" > "$ICUDIR/iknow_icu_url.txt"
fi
+##### Build JSON C++
+if ! [ -f "$JSONDIR/iknow_json_url.txt" ] || [ $(cat "$JSONDIR/iknow_json_url.txt") != "$JSON_URL" ]; then
+ rm -rf "$JSONDIR"
+ cd "$GITHUB_WORKSPACE/thirdparty"
+ git clone "$JSON_URL"
+ cd json
+ git checkout v3.10.2
+ echo "$JSON_URL" > "$JSONDIR/iknow_json_url.txt"
+fi
+export JSON_INCLUDE="$JSONDIR/single_include"
+
##### Build iKnow engine and run C++ unit tests #####
export IKNOWPLAT=macx64
diff --git a/actions/build_windows.sh b/actions/build_windows.sh
index e5fc6d8e..98962288 100755
--- a/actions/build_windows.sh
+++ b/actions/build_windows.sh
@@ -8,6 +8,8 @@
# Required Environment Variables:
# - ICU_URL is the URL to a .zip pre-built release of ICU for Windows x86_64
# - ICUDIR is the directory to install ICU
+# - JSON_URL is the URL of the C++ JSON project on Github
+# - JSONDIR is the directory to install the JSON header
# - BUILDCACHE_DIR is the directory where buildcache stores its cache
# - PYINSTALL_DIR is the directory where Python instances are installed
@@ -23,9 +25,19 @@ if ! [ -f "$ICUDIR/iknow_icu_url.txt" ] || [ $(cat "$ICUDIR/iknow_icu_url.txt")
echo "$ICU_URL" > "$ICUDIR/iknow_icu_url.txt"
fi
+##### Build JSON C++
+if ! [ -f "$JSONDIR/iknow_json_url.txt" ] || [ $(cat "$JSONDIR/iknow_json_url.txt") != "$JSON_URL" ]; then
+ rm -rf "$JSONDIR"
+ cd "$GITHUB_WORKSPACE/thirdparty"
+ git clone "$JSON_URL"
+ cd json
+ git checkout v3.10.2
+ echo "$JSON_URL" > "$JSONDIR/iknow_json_url.txt"
+fi
+export JSON_INCLUDE="$JSONDIR/single_include"
##### Build iKnow engine and run C++ unit tests #####
-cd modules
+cd "$GITHUB_WORKSPACE/modules"
MSBUILD_PATH="/c/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/MSBuild/Current/Bin"
BUILDCACHE_IMPERSONATE=cl.exe PATH="$MSBUILD_PATH:$PATH" \
MSBuild.exe iKnowEngine.sln -p:Configuration=Release -p:Platform=x64 \
diff --git a/actions/dependencies.sh b/actions/dependencies.sh
index ba0464d8..7852523d 100755
--- a/actions/dependencies.sh
+++ b/actions/dependencies.sh
@@ -12,6 +12,7 @@ set -euxo pipefail
ICU_NAME="ICU 69.1"
ICU_URL_WIN=https://github.com/unicode-org/icu/releases/download/release-69-1/icu4c-69_1-Win64-MSVC2019.zip
ICU_URL_SRC=https://github.com/unicode-org/icu/releases/download/release-69-1/icu4c-69_1-src.zip
+JSON_URL=https://github.com/nlohmann/json.git
PYVERSIONS_WIN="3.6.8 3.7.9 3.8.10 3.9.7"
PYVERSIONS_OSX="3.6.14 3.7.12 3.8.12 3.9.7"
BUILDCACHE_NAME="Release v0.27.1"
@@ -32,6 +33,8 @@ else
echo "ICU_URL=$ICU_URL_SRC" >> $GITHUB_ENV
fi
+echo "JSON_URL=$JSON_URL" >> $GITHUB_ENV
+
# Python versions
# Availability of certain versions can differ between NuGet and pyenv.
if [ "$RUNNER_OS" = Windows ]; then
diff --git a/modules/Dependencies.props b/modules/Dependencies.props
index 1d233440..81f87a20 100644
--- a/modules/Dependencies.props
+++ b/modules/Dependencies.props
@@ -5,6 +5,7 @@
$(SolutionDir)..\thirdparty\icu\
$(ICUDIR)\include
$(ICUDIR)\lib64
+ $(SolutionDir)..\thirdparty\json\single_include\
@@ -14,10 +15,13 @@
$(ICU_LIB)
+
+ $(JSON_INCLUDE)
+
- $(ICU_INCLUDE)
+ $(ICU_INCLUDE);$(JSON_INCLUDE)
$(ICU_LIB)
diff --git a/modules/engine/engine.mak b/modules/engine/engine.mak
index f8887093..149dad8f 100644
--- a/modules/engine/engine.mak
+++ b/modules/engine/engine.mak
@@ -8,7 +8,7 @@ CPP_LANGUAGE = 14
INCLUDEDIRS = $(ROOT_DIR)/modules/engine/src $(ROOT_DIR)/modules/shell/src/SDK/headers $(ROOT_DIR)/modules/shell/src $(ROOT_DIR)/modules/ali $(ROOT_DIR)/modules/core/src/headers $(ROOT_DIR)/modules/base/src/headers $(ROOT_DIR)/modules/compiler/iKnowLanguageCompiler \
$(ROOT_DIR)/kernel/$(PLATFORM)/h $(ROOT_DIR)/kernel/ux/h $(ROOT_DIR)/kernel/common/h $(ROOT_DIR)/shared/System/unix $(ROOT_DIR)/shared/System \
- $(ROOT_DIR)/shared/Utility $(ICUDIR)/include
+ $(ROOT_DIR)/shared/Utility $(ICUDIR)/include $(JSON_INCLUDE)
OBJECTFLAGS += -DMACHINETYP=58
diff --git a/modules/engine/src/engine.cpp b/modules/engine/src/engine.cpp
index 5a2dac13..2c8fc41f 100644
--- a/modules/engine/src/engine.cpp
+++ b/modules/engine/src/engine.cpp
@@ -588,4 +588,147 @@ void iKnowEngine::unloadUserDictionary(void)
delete pUserDCT;
pUserDCT = NULL;
}
+}
+
+#include
+using nlohmann::json;
+using namespace iknowdata;
+
+int iknow_json_index_(String text, string lang, bool b_trace, json& j_response)
+{
+ iKnowEngine engine;
+ engine.index(text, lang, b_trace);
+
+ for (size_t cnt_sentence = 1; cnt_sentence <= engine.m_index.sentences.size(); ++cnt_sentence) {
+ struct Sentence& sent = engine.m_index.sentences[cnt_sentence - 1];
+
+ json j_array_entities = json::array();
+ for_each(sent.entities.begin(), sent.entities.end(), [&j_array_entities](Entity& entity) {
+ json j_entity = {
+ {"type", Entity::TypeName(entity.type_)},
+ {"offset_start", entity.offset_start_},
+ {"offset_stop", entity.offset_stop_},
+ {"index", entity.index_},
+ {"dominance_value", entity.dominance_value_},
+ {"entity_id", entity.entity_id_}
+ };
+ j_array_entities.push_back(j_entity);
+ });
+ j_response["sentences"][std::to_string(cnt_sentence)]["entities"] = j_array_entities;
+
+ json j_array_attributes = json::array();
+ for_each(sent.sent_attributes.begin(), sent.sent_attributes.end(), [&j_array_attributes](Sent_Attribute& attribute) {
+ Sent_Attribute::Sent_Attribute_Parameters ¶ms = attribute.parameters_;
+ json j_attribute_parameters = json::array();
+ for (auto it = params.begin(); it != params.end(); ++it) { // historically, the first 2 sets are always present, even if unused (=empty)
+ if (!it->first.empty())
+ j_array_attributes.push_back(it->first);
+ if (!it->second.empty())
+ j_array_attributes.push_back(it->second);
+ }
+ json j_attribute = {
+ {"type", AttributeName(attribute.type_)},
+ {"offset_start", attribute.offset_start_},
+ {"offset_stop", attribute.offset_stop_},
+ {"marker", attribute.marker_},
+ {"parameters", j_attribute_parameters},
+ {"entity_ref", attribute.entity_ref},
+ {"entity_vector", attribute.entity_vector}
+ };
+ j_array_attributes.push_back(j_attribute);
+ });
+ j_response["sentences"][std::to_string(cnt_sentence)]["attributes"] = j_array_attributes;
+
+ j_response["sentences"][std::to_string(cnt_sentence)]["path"] = sent.path;
+
+ // json j_path_attributes = sent.path_attributes;
+ json j_array_path_attributes = json::array();
+ for_each(sent.path_attributes.begin(), sent.path_attributes.end(), [&j_array_path_attributes](Path_Attribute& attribute) {
+ json j_path_attribute = {
+ {"type", AttributeName(attribute.type)},
+ {"position", attribute.pos},
+ {"span", attribute.span}
+ };
+ j_array_path_attributes.push_back(j_path_attribute);
+ });
+ j_response["sentences"][std::to_string(cnt_sentence)]["path_attributes"] = j_array_path_attributes;
+ }
+
+ j_response["proximity"] = engine.m_index.proximity;
+ j_response["traces"] = engine.m_traces;
+ return 0;
+}
+
+int iknow_json_(const char* request, const char** response)
+{
+ thread_local string s_response; // thread local : memory allocated will be freed if thread dies...
+ json j_request, j_response;
+
+ int ret_val = 0;
+ try {
+ j_request = json::parse(string(request)); // parse request into json object
+ }
+ catch (std::exception& e) {
+ j_response["error"] = e.what(); // std::cout << e.what() << std::endl;
+ s_response = j_response.dump(); // from json string on thread local storage buffer
+ ret_val = -1;
+ }
+ if (ret_val == 0) { // JSON Parsing succesfull
+ if (j_request.find("method") != j_request.end()) {
+ if (j_request["method"] == string("GetLanguagesSet")) {
+ j_response["iknow_languages"] = iKnowEngine::GetLanguagesSet();
+ }
+ // static std::string NormalizeText(const std::string & text_source, const std::string & language, bool bUserDct = false, bool bLowerCase = true, bool bStripPunct = true);
+ if (j_request["method"] == string("NormalizeText")) {
+ string text_source = j_request["text_source"];
+ string language = j_request["language"];
+ bool bUserDct = false; // default value
+ if (j_request.find("bUserDct") != j_request.end())
+ bUserDct = j_request["bUserDct"];
+ bool bLowerCase = true; // default value
+ if (j_request.find("bLowerCase") != j_request.end())
+ bLowerCase = j_request["bLowerCase"];
+ bool bStripPunct = true; // default value
+ if (j_request.find("bStripPunct") != j_request.end())
+ bStripPunct = j_request["bStripPunct"];
+
+ j_response["normalized"] = iKnowEngine::NormalizeText(text_source, language, bUserDct, bLowerCase, bStripPunct);
+ }
+ // static std::string IdentifyLanguage(const std::string& text_source, double& certainty);
+ if (j_request["method"] == "IdentifyLanguage") {
+ string text_source = j_request["text_source"];
+ double certainty;
+ j_response["language"] = iKnowEngine::IdentifyLanguage(text_source, certainty);
+ j_response["certainty"] = to_string(certainty);
+ }
+ // void index(iknow::base::String& text_source, const std::string& language, bool b_trace=false);
+ if (j_request["method"] == "index") {
+ string text_source = j_request["text_source"];
+ string language = j_request["language"];
+ bool b_trace = false; // default value
+ if (j_request.find("b_trace") != j_request.end())
+ b_trace = j_request["b_trace"];
+
+ iknow_json_index_(IkStringEncoding::UTF8ToBase(text_source), language, b_trace, j_response);
+ }
+ }
+ else {
+ j_response["error"] = "\"method\" key not found in JSON string, see documentation for correct parameters !";
+ ret_val = -1;
+ }
+ }
+ try {
+ s_response = j_response.dump(4); // serialize and store
+ *response = s_response.c_str(); // pass read-only buffer
+ }
+ catch (...) {
+ ret_val = -2;
+ }
+ return ret_val;
+}
+
+extern "C" {
+ int iknow_json(const char* request, const char** response) {
+ return iknow_json_(request, response);
+ }
}
\ No newline at end of file
diff --git a/modules/engine/src/engine.h b/modules/engine/src/engine.h
index 700b6f1b..5cff1218 100644
--- a/modules/engine/src/engine.h
+++ b/modules/engine/src/engine.h
@@ -113,6 +113,17 @@ namespace iknowdata { // to bundle all generated data
std::string index_; // the normalized entity textual representation, utf8 encoded
double dominance_value_; // a dominance value for each concept in the source document is calculated, most important concepts have highest score.
size_t entity_id_; // unique concept index in the source document, if not concept, this value equals kNoConcept
+
+ static inline std::string TypeName(eType ent_type) { // translate the attribute type
+ switch (ent_type) {
+ case eType::NonRelevant: return "NonRelevant";
+ case eType::Concept: return "Concept";
+ case eType::Relation: return "Relation";
+ case eType::PathRelevant: return "PathRelevant";
+
+ default: return "unknown";
+ }
+ }
};
struct Sent_Attribute // sentence attribute
@@ -134,8 +145,7 @@ namespace iknowdata { // to bundle all generated data
Attribute type_;
size_t offset_start_, offset_stop_; // these refer to offsets in the text, "start" is where the textual representation starts, "stop" is where it stops.
std::string marker_; // the normalized attribute textual representation, utf8 encoded
- // std::string value_, unit_, value2_, unit2_; // optional properties for measurement attribute
- Sent_Attribute_Parameters parameters_; // variable number of paramters, for measurement, that are value/unit pairs.
+ Sent_Attribute_Parameters parameters_; // variable number of parameters, for measurement, that are value/unit pairs.
Entity_Ref entity_ref; // reference to entity vector, max number of entities in a sentence is 1028, so unsigned short should be enough
std::vector entity_vector; // EntityVector, only used in Japanese
@@ -296,4 +306,11 @@ class IKNOW_API iKnowEngine
// helper method that makes a language ready for ALI uses.
static void add_lang_for_ALI(std::string lang);
bool m_document_level_ALI;
-};
\ No newline at end of file
+};
+
+//
+// C interface talking json
+//
+extern "C" {
+ IKNOW_API int iknow_json(const char* request, const char** response);
+}
diff --git a/modules/enginetest/enginetest.cpp b/modules/enginetest/enginetest.cpp
index 348cda97..2fa7b954 100644
--- a/modules/enginetest/enginetest.cpp
+++ b/modules/enginetest/enginetest.cpp
@@ -6,6 +6,9 @@
#include
#include