Skip to content

Commit

Permalink
Add "C" interface function
Browse files Browse the repository at this point in the history
Implementation of Issue#14
  • Loading branch information
JosDenysGitHub committed Oct 5, 2021
1 parent 18f9099 commit a25e12a
Show file tree
Hide file tree
Showing 18 changed files with 572 additions and 12 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ on:

env:
ICUDIR: ${{ github.workspace }}/thirdparty/icu
JSONDIR: ${{ github.workspace }}/thirdparty/json

jobs:
manylinux2010_x86_64:
Expand All @@ -50,7 +51,7 @@ jobs:
~/ccache
~/pipcache
- name: build and run C++ unit tests
run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2010_x86_64:$MANYLINUX2010_X86_64_TAG /iknow/actions/build_manylinux.sh
run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2010_x86_64:$MANYLINUX2010_X86_64_TAG /iknow/actions/build_manylinux.sh
- name: upload wheel artifact
uses: actions/upload-artifact@v2
with:
Expand Down Expand Up @@ -95,7 +96,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y qemu-user-static binfmt-support
- name: build and run C++ unit tests
run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_aarch64:$MANYLINUX2014_AARCH64_TAG /iknow/actions/build_manylinux.sh
run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_aarch64:$MANYLINUX2014_AARCH64_TAG /iknow/actions/build_manylinux.sh
- name: upload wheel artifact
uses: actions/upload-artifact@v2
with:
Expand Down Expand Up @@ -126,7 +127,7 @@ jobs:
sudo apt-get update
sudo apt-get install -y qemu-user-static binfmt-support
- name: build and run C++ unit tests
run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_ppc64le:$MANYLINUX2014_PPC64LE_TAG /iknow/actions/build_manylinux.sh
run: docker run --rm -e CCACHE_DIR=/ccache -e PIP_CACHE_DIR=/pipcache -e CCACHE_MAXSIZE=500M -e ICU_URL -e JSON_URL -e CYTHON_VERSION -v ~/ccache:/ccache -v ~/pipcache:/pipcache -v $GITHUB_WORKSPACE:/iknow quay.io/pypa/manylinux2014_ppc64le:$MANYLINUX2014_PPC64LE_TAG /iknow/actions/build_manylinux.sh
- name: upload wheel artifact
uses: actions/upload-artifact@v2
with:
Expand Down
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ include $(ROOT_DIR)/build/make/platforms/$(PLATFORM).mak

all : engine

test : enginetest
test : enginetest engineloadtest
$(RUNTIMELOADPATHVAR)=$(ROOT_DIR)/kit/$(PLATFORM)/$(MODE)/bin:$(ICUDIR)/lib $(ROOT_DIR)/kit/$(PLATFORM)/$(MODE)/bin/iknowenginetest

engineloadtest : engine base
$(MAKE) -f $(ROOT_DIR)/modules/iKnowExplicitTest/engineloadtest.mak
languagecompiler : base core
$(MAKE) -f $(ROOT_DIR)/modules/compiler/iKnowLanguageCompiler/languagecompiler.mak
enginetest : engine base
Expand Down
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ This readme file has everything you need to get started, but make sure you click
- [From Python](#from-python)
- [From C++](#from-c)
- [From InterSystems IRIS](#from-intersystems-iris)
- [From Different Platforms](#from-other-platforms)
- [Understanding iKnow](#understanding-iknow)
- [Entities](#entities)
- [Attributes](#attributes)
Expand Down Expand Up @@ -75,6 +76,14 @@ For many years, the iKnow engine has been available as an embedded service on th
The [InterSystems IRIS Community Edition](https://docs.intersystems.com/irislatest/csp/docbook/DocBook.UI.Page.cls?KEY=ACLOUD) is available from Docker Hub free of charge.
## From Different Platforms
Since version 1.3, a C-interface is available, enabling communication with the iKnow engine in a JSON encoded request/response style:
```C
const char* j_response;
iknow_json(R"({"method" : "index", "language" : "en", "text_source" : "Hello World"})", &j_response);
```
Most API functionality is available in a serialized json format.

# Understanding iKnow

Expand Down
12 changes: 12 additions & 0 deletions actions/build_manylinux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# - CCACHE_MAXSIZE is the size limit for files held with ccache
# - PIP_CACHE_DIR is the location that pip caches files
# - ICU_URL is the URL to a .zip source release of ICU
# - JSON_URL is the URL of the C++ JSON project on Github

set -euxo pipefail

Expand Down Expand Up @@ -58,6 +59,17 @@ if ! [ -f "$ICUDIR/iknow_icu_url.txt" ] || [ $(cat "$ICUDIR/iknow_icu_url.txt")
echo "$ICU_URL" > "$ICUDIR/iknow_icu_url.txt"
fi

##### Build JSON C++
export JSONDIR=/iknow/thirdparty/json
if ! [ -f "$JSONDIR/iknow_json_url.txt" ] || [ $(cat "$JSONDIR/iknow_json_url.txt") != "$JSON_URL" ]; then
rm -rf "$JSONDIR"
cd /iknow/thirdparty
git clone "$JSON_URL"
cd json
git checkout v3.10.2
echo "$JSON_URL" > "$JSONDIR/iknow_json_url.txt"
fi
export JSON_INCLUDE=$JSONDIR/single_include

##### Build iKnow engine and run C++ unit tests #####
cd /iknow
Expand Down
13 changes: 13 additions & 0 deletions actions/build_osx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
# Required Environment Variables:
# - ICU_URL is the URL to a .zip source release of ICU
# - ICUDIR is the directory to install ICU
# - JSON_URL is the URL of the C++ JSON project on Github
# - JSONDIR is the directory to install the JSON header
# - MACOSX_DEPLOYMENT_TARGET is the minimum supported Mac OS X version

set -euxo pipefail
Expand All @@ -29,6 +31,17 @@ if ! [ -f "$ICUDIR/iknow_icu_url.txt" ] || [ $(cat "$ICUDIR/iknow_icu_url.txt")
echo "$ICU_URL" > "$ICUDIR/iknow_icu_url.txt"
fi

##### Build JSON C++
if ! [ -f "$JSONDIR/iknow_json_url.txt" ] || [ $(cat "$JSONDIR/iknow_json_url.txt") != "$JSON_URL" ]; then
rm -rf "$JSONDIR"
cd "$GITHUB_WORKSPACE/thirdparty"
git clone "$JSON_URL"
cd json
git checkout v3.10.2
echo "$JSON_URL" > "$JSONDIR/iknow_json_url.txt"
fi
export JSON_INCLUDE="$JSONDIR/single_include"


##### Build iKnow engine and run C++ unit tests #####
export IKNOWPLAT=macx64
Expand Down
14 changes: 13 additions & 1 deletion actions/build_windows.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
# Required Environment Variables:
# - ICU_URL is the URL to a .zip pre-built release of ICU for Windows x86_64
# - ICUDIR is the directory to install ICU
# - JSON_URL is the URL of the C++ JSON project on Github
# - JSONDIR is the directory to install the JSON header
# - BUILDCACHE_DIR is the directory where buildcache stores its cache
# - PYINSTALL_DIR is the directory where Python instances are installed

Expand All @@ -23,9 +25,19 @@ if ! [ -f "$ICUDIR/iknow_icu_url.txt" ] || [ $(cat "$ICUDIR/iknow_icu_url.txt")
echo "$ICU_URL" > "$ICUDIR/iknow_icu_url.txt"
fi

##### Build JSON C++
if ! [ -f "$JSONDIR/iknow_json_url.txt" ] || [ $(cat "$JSONDIR/iknow_json_url.txt") != "$JSON_URL" ]; then
rm -rf "$JSONDIR"
cd "$GITHUB_WORKSPACE/thirdparty"
git clone "$JSON_URL"
cd json
git checkout v3.10.2
echo "$JSON_URL" > "$JSONDIR/iknow_json_url.txt"
fi
export JSON_INCLUDE="$JSONDIR/single_include"

##### Build iKnow engine and run C++ unit tests #####
cd modules
cd "$GITHUB_WORKSPACE/modules"
MSBUILD_PATH="/c/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/MSBuild/Current/Bin"
BUILDCACHE_IMPERSONATE=cl.exe PATH="$MSBUILD_PATH:$PATH" \
MSBuild.exe iKnowEngine.sln -p:Configuration=Release -p:Platform=x64 \
Expand Down
3 changes: 3 additions & 0 deletions actions/dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ set -euxo pipefail
ICU_NAME="ICU 69.1"
ICU_URL_WIN=https://github.com/unicode-org/icu/releases/download/release-69-1/icu4c-69_1-Win64-MSVC2019.zip
ICU_URL_SRC=https://github.com/unicode-org/icu/releases/download/release-69-1/icu4c-69_1-src.zip
JSON_URL=https://github.com/nlohmann/json.git
PYVERSIONS_WIN="3.6.8 3.7.9 3.8.10 3.9.7"
PYVERSIONS_OSX="3.6.14 3.7.12 3.8.12 3.9.7"
BUILDCACHE_NAME="Release v0.27.1"
Expand All @@ -32,6 +33,8 @@ else
echo "ICU_URL=$ICU_URL_SRC" >> $GITHUB_ENV
fi

echo "JSON_URL=$JSON_URL" >> $GITHUB_ENV

# Python versions
# Availability of certain versions can differ between NuGet and pyenv.
if [ "$RUNNER_OS" = Windows ]; then
Expand Down
6 changes: 5 additions & 1 deletion modules/Dependencies.props
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<ICUDIR>$(SolutionDir)..\thirdparty\icu\</ICUDIR>
<ICU_INCLUDE>$(ICUDIR)\include</ICU_INCLUDE>
<ICU_LIB>$(ICUDIR)\lib64</ICU_LIB>
<JSON_INCLUDE>$(SolutionDir)..\thirdparty\json\single_include\</JSON_INCLUDE>
</PropertyGroup>
<PropertyGroup />
<ItemGroup>
Expand All @@ -14,10 +15,13 @@
<BuildMacro Include="ICU_LIB">
<Value>$(ICU_LIB)</Value>
</BuildMacro>
<BuildMacro Include="JSON_INCLUDE">
<Value>$(JSON_INCLUDE)</Value>
</BuildMacro>
</ItemGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(ICU_INCLUDE)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(ICU_INCLUDE);$(JSON_INCLUDE)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(ICU_LIB)</AdditionalLibraryDirectories>
Expand Down
2 changes: 1 addition & 1 deletion modules/engine/engine.mak
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ CPP_LANGUAGE = 14

INCLUDEDIRS = $(ROOT_DIR)/modules/engine/src $(ROOT_DIR)/modules/shell/src/SDK/headers $(ROOT_DIR)/modules/shell/src $(ROOT_DIR)/modules/ali $(ROOT_DIR)/modules/core/src/headers $(ROOT_DIR)/modules/base/src/headers $(ROOT_DIR)/modules/compiler/iKnowLanguageCompiler \
$(ROOT_DIR)/kernel/$(PLATFORM)/h $(ROOT_DIR)/kernel/ux/h $(ROOT_DIR)/kernel/common/h $(ROOT_DIR)/shared/System/unix $(ROOT_DIR)/shared/System \
$(ROOT_DIR)/shared/Utility $(ICUDIR)/include
$(ROOT_DIR)/shared/Utility $(ICUDIR)/include $(JSON_INCLUDE)

OBJECTFLAGS += -DMACHINETYP=58

Expand Down
143 changes: 143 additions & 0 deletions modules/engine/src/engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,4 +588,147 @@ void iKnowEngine::unloadUserDictionary(void)
delete pUserDCT;
pUserDCT = NULL;
}
}

#include <nlohmann/json.hpp>
using nlohmann::json;
using namespace iknowdata;

int iknow_json_index_(String text, string lang, bool b_trace, json& j_response)
{
iKnowEngine engine;
engine.index(text, lang, b_trace);

for (size_t cnt_sentence = 1; cnt_sentence <= engine.m_index.sentences.size(); ++cnt_sentence) {
struct Sentence& sent = engine.m_index.sentences[cnt_sentence - 1];

json j_array_entities = json::array();
for_each(sent.entities.begin(), sent.entities.end(), [&j_array_entities](Entity& entity) {
json j_entity = {
{"type", Entity::TypeName(entity.type_)},
{"offset_start", entity.offset_start_},
{"offset_stop", entity.offset_stop_},
{"index", entity.index_},
{"dominance_value", entity.dominance_value_},
{"entity_id", entity.entity_id_}
};
j_array_entities.push_back(j_entity);
});
j_response["sentences"][std::to_string(cnt_sentence)]["entities"] = j_array_entities;

json j_array_attributes = json::array();
for_each(sent.sent_attributes.begin(), sent.sent_attributes.end(), [&j_array_attributes](Sent_Attribute& attribute) {
Sent_Attribute::Sent_Attribute_Parameters &params = attribute.parameters_;
json j_attribute_parameters = json::array();
for (auto it = params.begin(); it != params.end(); ++it) { // historically, the first 2 sets are always present, even if unused (=empty)
if (!it->first.empty())
j_array_attributes.push_back(it->first);
if (!it->second.empty())
j_array_attributes.push_back(it->second);
}
json j_attribute = {
{"type", AttributeName(attribute.type_)},
{"offset_start", attribute.offset_start_},
{"offset_stop", attribute.offset_stop_},
{"marker", attribute.marker_},
{"parameters", j_attribute_parameters},
{"entity_ref", attribute.entity_ref},
{"entity_vector", attribute.entity_vector}
};
j_array_attributes.push_back(j_attribute);
});
j_response["sentences"][std::to_string(cnt_sentence)]["attributes"] = j_array_attributes;

j_response["sentences"][std::to_string(cnt_sentence)]["path"] = sent.path;

// json j_path_attributes = sent.path_attributes;
json j_array_path_attributes = json::array();
for_each(sent.path_attributes.begin(), sent.path_attributes.end(), [&j_array_path_attributes](Path_Attribute& attribute) {
json j_path_attribute = {
{"type", AttributeName(attribute.type)},
{"position", attribute.pos},
{"span", attribute.span}
};
j_array_path_attributes.push_back(j_path_attribute);
});
j_response["sentences"][std::to_string(cnt_sentence)]["path_attributes"] = j_array_path_attributes;
}

j_response["proximity"] = engine.m_index.proximity;
j_response["traces"] = engine.m_traces;
return 0;
}

int iknow_json_(const char* request, const char** response)
{
thread_local string s_response; // thread local : memory allocated will be freed if thread dies...
json j_request, j_response;

int ret_val = 0;
try {
j_request = json::parse(string(request)); // parse request into json object
}
catch (std::exception& e) {
j_response["error"] = e.what(); // std::cout << e.what() << std::endl;
s_response = j_response.dump(); // from json string on thread local storage buffer
ret_val = -1;
}
if (ret_val == 0) { // JSON Parsing succesfull
if (j_request.find("method") != j_request.end()) {
if (j_request["method"] == string("GetLanguagesSet")) {
j_response["iknow_languages"] = iKnowEngine::GetLanguagesSet();
}
// static std::string NormalizeText(const std::string & text_source, const std::string & language, bool bUserDct = false, bool bLowerCase = true, bool bStripPunct = true);
if (j_request["method"] == string("NormalizeText")) {
string text_source = j_request["text_source"];
string language = j_request["language"];
bool bUserDct = false; // default value
if (j_request.find("bUserDct") != j_request.end())
bUserDct = j_request["bUserDct"];
bool bLowerCase = true; // default value
if (j_request.find("bLowerCase") != j_request.end())
bLowerCase = j_request["bLowerCase"];
bool bStripPunct = true; // default value
if (j_request.find("bStripPunct") != j_request.end())
bStripPunct = j_request["bStripPunct"];

j_response["normalized"] = iKnowEngine::NormalizeText(text_source, language, bUserDct, bLowerCase, bStripPunct);
}
// static std::string IdentifyLanguage(const std::string& text_source, double& certainty);
if (j_request["method"] == "IdentifyLanguage") {
string text_source = j_request["text_source"];
double certainty;
j_response["language"] = iKnowEngine::IdentifyLanguage(text_source, certainty);
j_response["certainty"] = to_string(certainty);
}
// void index(iknow::base::String& text_source, const std::string& language, bool b_trace=false);
if (j_request["method"] == "index") {
string text_source = j_request["text_source"];
string language = j_request["language"];
bool b_trace = false; // default value
if (j_request.find("b_trace") != j_request.end())
b_trace = j_request["b_trace"];

iknow_json_index_(IkStringEncoding::UTF8ToBase(text_source), language, b_trace, j_response);
}
}
else {
j_response["error"] = "\"method\" key not found in JSON string, see documentation for correct parameters !";
ret_val = -1;
}
}
try {
s_response = j_response.dump(4); // serialize and store
*response = s_response.c_str(); // pass read-only buffer
}
catch (...) {
ret_val = -2;
}
return ret_val;
}

extern "C" {
int iknow_json(const char* request, const char** response) {
return iknow_json_(request, response);
}
}
23 changes: 20 additions & 3 deletions modules/engine/src/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,17 @@ namespace iknowdata { // to bundle all generated data
std::string index_; // the normalized entity textual representation, utf8 encoded
double dominance_value_; // a dominance value for each concept in the source document is calculated, most important concepts have highest score.
size_t entity_id_; // unique concept index in the source document, if not concept, this value equals kNoConcept

static inline std::string TypeName(eType ent_type) { // translate the attribute type
switch (ent_type) {
case eType::NonRelevant: return "NonRelevant";
case eType::Concept: return "Concept";
case eType::Relation: return "Relation";
case eType::PathRelevant: return "PathRelevant";

default: return "unknown";
}
}
};

struct Sent_Attribute // sentence attribute
Expand All @@ -134,8 +145,7 @@ namespace iknowdata { // to bundle all generated data
Attribute type_;
size_t offset_start_, offset_stop_; // these refer to offsets in the text, "start" is where the textual representation starts, "stop" is where it stops.
std::string marker_; // the normalized attribute textual representation, utf8 encoded
// std::string value_, unit_, value2_, unit2_; // optional properties for measurement attribute
Sent_Attribute_Parameters parameters_; // variable number of paramters, for measurement, that are value/unit pairs.
Sent_Attribute_Parameters parameters_; // variable number of parameters, for measurement, that are value/unit pairs.

Entity_Ref entity_ref; // reference to entity vector, max number of entities in a sentence is 1028, so unsigned short should be enough
std::vector<Entity_Ref> entity_vector; // EntityVector, only used in Japanese
Expand Down Expand Up @@ -296,4 +306,11 @@ class IKNOW_API iKnowEngine
// helper method that makes a language ready for ALI uses.
static void add_lang_for_ALI(std::string lang);
bool m_document_level_ALI;
};
};

//
// C interface talking json
//
extern "C" {
IKNOW_API int iknow_json(const char* request, const char** response);
}
Loading

0 comments on commit a25e12a

Please sign in to comment.