Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
denBruneBarone committed Dec 15, 2023
2 parents cef947f + 03f0934 commit 2964686
Show file tree
Hide file tree
Showing 11 changed files with 375 additions and 106 deletions.
6 changes: 0 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@ WORKDIR /code

COPY . .

# Install necessary build tools and dependencies
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
&& rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir -r requirements_docker.txt

CMD ["python", "-u", "-m", "server.server", "--host", "0.0.0.0", "--port", "4444", "--reload"]
4 changes: 2 additions & 2 deletions concept_linking/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Tools
#Requirements for LlamaServer
-r tools/LlamaServer/requirements.txt
#-r tools/LlamaServer/requirements.txt

#Requirements for OntologyGraphBuilder
-r tools/OntologyGraphBuilder/requirements.txt
#-r tools/OntologyGraphBuilder/requirements.txt

# Solutions
#Requirements for MachineLearning
Expand Down
9 changes: 6 additions & 3 deletions concept_linking/solutions/PromptEngineering/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@
from relation_extraction.knowledge_graph_messenger import KnowledgeGraphMessenger
from concept_linking.tools.triple_helper import *

# Local API url
# Local API url python
api_url = "http://127.0.0.1:5000/llama"

# Local API url docker
# api_url = "http://llama-cpu-server:5000/llama"

# Remote API url
# api_url = "http://knox-proxy01.srv.aau.dk/llama-api/llama"

headers = {"Content-Type": "application/json"}
headers = {"Access-Authorization": os.getenv("ACCESS_SECRET"), "Content-Type": "application/json"}

PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))

Expand Down Expand Up @@ -181,7 +184,7 @@ def perform_entity_type_classification(post_json, output_file_path=None, output_


if __name__ == '__main__':
input_file = os.path.join(PROJECT_ROOT, "data/files/EvaluationData/evaluationSet_EN.json")
input_file = os.path.join(PROJECT_ROOT, "data/files/EvaluationData/evaluationSet_EN_small.json")
output_file = os.path.join(PROJECT_ROOT, "data/files/PromptEngineering/output.json")

f = open(input_file, encoding="utf-8")
Expand Down
16 changes: 10 additions & 6 deletions concept_linking/tools/LlamaServer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
# Use python as base image
FROM python

FROM python:3.11-slim
# Set the working directory in the container
WORKDIR /app

# Copy only the necessary files
COPY llama_cpu_server.py .
COPY requirements.txt .

#Install necessary build tools and dependencies for running C++(llama_cpp)
# This can be removed when app is in production and remote llama api server is reliable and used instead of local llama
# Install dependencies and curl
RUN apt-get update && apt-get install -y build-essential cmake curl && rm -rf /var/lib/apt/lists/*


# Install dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Check if the model file exists, and if not, download it using the provided function
RUN python -c "from llama_cpu_server import download_model; download_model('llama-2-7b-chat.Q2_K.gguf', 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf?download=true')"

# Download the model file from the URL if it doesn't exist
RUN test -e /app/llama-2-7b-chat.Q2_K.gguf || curl -o llama-2-7b-chat.Q2_K.gguf -LJO 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf?download=true'

# Expose port 5000 outside of the container
EXPOSE 5000

# Run llama_cpu_server.py when the container launches
CMD ["python", "llama_cpu_server.py"]
CMD ["python", "-u", "-m", "llama_cpu_server", "--host", "0.0.0.0", "--port", "5000", "--reload"]
12 changes: 5 additions & 7 deletions concept_linking/tools/LlamaServer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
version: '3'

services:
llama-cpu-server:
build:
context: .
dockerfile: Dockerfile
ports:
- "5000:5000"
build: .
container_name: llama-server
command: python -u -m llama_cpu_server --host 0.0.0.0 --port 5000 --reload
volumes:
- ./concept_linking/tools/LlamaServer/llama-2-7b-chat.Q2_K.gguf:/app/concept_linking/tools/LlamaServer/llama-2-7b-chat.Q2_K.gguf
ports:
- "5000:5000"
4 changes: 1 addition & 3 deletions relation_extraction/evaluation/DanskEvaluering.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
<originaltripleset>
<otriple>DanskeFartøjer | powerType | Elektrisk</otriple>
<otriple>
DanskeFartøjer | length | "24000"^^<http://dbpedia.org/datatype/millimetre>
DanskeFartøjer | length | "24000"^^&lt;http://dbpedia.org/datatype/millimetre&gt;
</otriple>
</originaltripleset>
<modifiedtripleset>
Expand All @@ -61,14 +61,12 @@
<originaltripleset>
<otriple>København_Tårn | architect | Lars Mikkelsen</otriple>
<otriple>København_Tårn | address | "Købmagergade 52"@da</otriple>
<otriple>København_Tårn | currentTenants | Danmarks Radio</otriple>
<otriple>København_Tårn | location | København</otriple>
<otriple>Danmarks Radio | country | Denmark</otriple>
</originaltripleset>
<modifiedtripleset>
<mtriple>København_Tårn | architect | Lars Mikkelsen</mtriple>
<mtriple>København_Tårn | address | "Købmagergade 52"</mtriple>
<mtriple>København_Tårn | currentTenants | Danmarks Radio</mtriple>
<mtriple>København_Tårn | location | København</mtriple>
<mtriple>Danmarks Radio | country | Denmark</mtriple>
</modifiedtripleset>
Expand Down
38 changes: 25 additions & 13 deletions relation_extraction/evaluation/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from relation_extraction.NaiveMVP.main import parse_data
from relation_extraction.multilingual.llm_messenger import LLMMessenger
import re
import copy
import datetime
import json

Expand All @@ -21,7 +22,7 @@ def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 3,

def convert_testdata_to_input_format():
objs = []
tree = ET.parse('relation_extraction/Evaluation/testdataMini.xml')
tree = ET.parse('relation_extraction/evaluation/DanskEvaluering.xml')
root = tree.getroot()
for entry in root.findall('.//entry'):
sentence = entry.findall('lex')[0].text
Expand All @@ -42,7 +43,13 @@ def calculate_metrics(data):
FP = 0
FN = 0

for element in data["triples"]:
data_without_duplicates = copy.deepcopy(data)

for triples in data_without_duplicates["triples"]:
triples["triples_from_solution"] = set(tuple(triple) for triple in triples["triples_from_solution"])
triples["triples_from_solution"] = list(list(triple) for triple in triples["triples_from_solution"])

for element in data_without_duplicates["triples"]:
TP += element["contains_hits"]
FP += len(element["triples_from_solution"]) - element["contains_hits"]
FN += len(element["expected_triples"]) - element["contains_hits"]
Expand Down Expand Up @@ -99,27 +106,32 @@ def main():
split_relations = [ontology_relations[i:i + chunk_size] for i in range(0, len(ontology_relations), chunk_size)] #Split the relations into lists of size chunk_size
res = []
for split_relation in split_relations:
res.append(solution(input_obj, split_relation, ontology_relations))
part_res = solution(input_obj, split_relation, ontology_relations)
for triple in part_res:
triple[1] = triple[1].replace("http://dbpedia.org/ontology/", "")
res.extend(part_res)
res_hits = 0
for triple in res:
convert_to_set_res = set(tuple(triples) for triples in res)
removed_duplicates_res = list(list(triples) for triples in convert_to_set_res)
for triple in removed_duplicates_res:
if triple in expected_triples:
res_hits += 1
hits +=1

evaluation_result_triples.append({"sentence":sentence, "triples_from_solution": res, "expected_triples": expected_triples, "contains_hits": res_hits})
eta = round((((datetime.datetime.now()-dt).total_seconds()/60)/((i+1)/len(input_objs)))*(1-((i+1)/len(input_objs))),5)
progress_suffix = f"Complete. Timeusage: {round((datetime.datetime.now()-dt).total_seconds()/60,5)} minutes. Eta {eta} minutes."
printProgressBar(i + 1, len(input_objs), prefix = 'Progress:', suffix = progress_suffix, length = 50)

print(f"Solution {name} finished. Hit {hits}/{total_triples}. Hit percentage: {(hits/total_triples)*100}%")
evaluation_results[name] = {
"triples": evaluation_result_triples,
"result": {"total_expected_triples": total_triples, "hits": hits, "hit_percentage": hits/total_triples},
"score": calculate_metrics({"triples": evaluation_result_triples})
}
print(f"Solution {name} finished. Hit {hits}/{total_triples}. Hit percentage: {(hits/total_triples)*100}%")
evaluation_results[name] = {
"triples": evaluation_result_triples,
"result": {"total_expected_triples": total_triples, "hits": hits, "hit_percentage": hits/total_triples},
"score": calculate_metrics({"triples": evaluation_result_triples})
}

with open("relation_extraction/Evaluation/evaluation_results.json", "w") as f:
json.dump(evaluation_results, f, indent=4)
with open("relation_extraction/Evaluation/evaluation_results.json", "w") as f:
json.dump(evaluation_results, f, indent=4)



Expand Down
Loading

0 comments on commit 2964686

Please sign in to comment.