diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..184c4eb --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "python.testing.unittestArgs": [ + "-v", + "-s", + ".", + "-p", + "test_*.py" + ], + "python.testing.pytestEnabled": false, + "python.testing.unittestEnabled": true +} \ No newline at end of file diff --git a/relation_extraction/API_handler.py b/relation_extraction/API_handler.py new file mode 100644 index 0000000..78ec97e --- /dev/null +++ b/relation_extraction/API_handler.py @@ -0,0 +1,17 @@ + +from abc import ABCMeta, abstractmethod + +class APIHandler(metaclass=ABCMeta): + @property + @classmethod + @abstractmethod + def API_endpoint(): + """Property used to define the API_endpoint for the subclass of APIHandler""" + pass + + @classmethod + @abstractmethod + def send_request(request): + pass + + \ No newline at end of file diff --git a/relation_extraction/LessNaive/lessNaive.py b/relation_extraction/LessNaive/lessNaive.py index bc7f697..78491d0 100644 --- a/relation_extraction/LessNaive/lessNaive.py +++ b/relation_extraction/LessNaive/lessNaive.py @@ -1,3 +1,5 @@ +from relation_extraction.ontology_messenger import OntologyMessenger +from relation_extraction.knowledge_graph_messenger import KnowledgeGraphMessenger from .openie import POST_corenlp import json import sys @@ -5,8 +7,6 @@ import urllib.parse from strsimpy.normalized_levenshtein import NormalizedLevenshtein from rapidfuzz.distance import Levenshtein -from relation_extraction.output import format_output -from relation_extraction.get_relations import extract_specific_relations def find_best_ontology_match(api_relation, ontology_relations): @@ -68,11 +68,11 @@ def do_relation_extraction(data, ontology_relations): relations.extend(val["relations"]) tuples = [[r["subject"], r["relation"], r["object"]] for r in relations] - format_output(tuples) + KnowledgeGraphMessenger.format_output(tuples) return tuples def main(): - ontology_relations = extract_specific_relations() + ontology_relations = OntologyMessenger.send_request() do_relation_extraction(json.load(open("inputSentences.json")), ontology_relations) diff --git a/relation_extraction/NaiveMVP/main.py b/relation_extraction/NaiveMVP/main.py index c53756d..f412d50 100644 --- a/relation_extraction/NaiveMVP/main.py +++ b/relation_extraction/NaiveMVP/main.py @@ -1,9 +1,9 @@ import json +from relation_extraction.knowledge_graph_messenger import KnowledgeGraphMessenger import strsimpy import sys from strsimpy.normalized_levenshtein import NormalizedLevenshtein -from relation_extraction.output import send_to_database_component -from relation_extraction.get_relations import extract_specific_relations +from relation_extraction.ontology_messenger import OntologyMessenger import datetime import multiprocessing as mp from functools import partial @@ -71,7 +71,7 @@ def parse_data(data, relations): def handle_relation_post_request(data): try: - relations = extract_specific_relations() + relations = OntologyMessenger.send_request() except Exception as E: print(f"Exception during retrieval of relations: {str(E)}") raise Exception(f"Exception during retrieval of relations") @@ -83,19 +83,19 @@ def handle_relation_post_request(data): raise Exception("Incorrectly formatted input. Exception during parsing") try: - send_to_database_component(parsed_data) + KnowledgeGraphMessenger.send_request(parsed_data) except Exception as E: print(f"Exception during request to database. {str(E)}") raise Exception("Data was not sent to database due to connection error") def main(): - relations = extract_specific_relations() + relations = OntologyMessenger.send_request() # Opening JSON file with open('inputSentences.json', 'r') as f: # returns JSON object as a dictionary data = json.load(f) - send_to_database_component(parse_data(data, relations)) + KnowledgeGraphMessenger.send_request(parse_data(data, relations)) if __name__ == "__main__": main() diff --git a/relation_extraction/evaluation/evaluation.py b/relation_extraction/evaluation/evaluation.py index f45c343..52e2ec7 100644 --- a/relation_extraction/evaluation/evaluation.py +++ b/relation_extraction/evaluation/evaluation.py @@ -1,13 +1,14 @@ import sys import xml.etree.ElementTree as ET +from relation_extraction.ontology_messenger import OntologyMessenger from relation_extraction.LessNaive.lessNaive import do_relation_extraction from relation_extraction.NaiveMVP.main import parse_data import re -from relation_extraction.get_relations import extract_specific_relations import datetime import json + def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 3, length = 100, fill = '█', printEnd = "\r"): percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) filledLength = int(length * iteration // total) @@ -54,7 +55,7 @@ def calculate_metrics(data): def main(): input_objs = convert_testdata_to_input_format() print("testdata converted successfully") - ontology_relations = extract_specific_relations() + ontology_relations = OntologyMessenger.send_request() solutions_to_test = { diff --git a/relation_extraction/get_relations.py b/relation_extraction/get_relations.py deleted file mode 100644 index 88c74ae..0000000 --- a/relation_extraction/get_relations.py +++ /dev/null @@ -1,43 +0,0 @@ -import requests -import re - -def extract_specific_relations_offline(): - "Function to extract relations based on the specified pattern" - ontology_file_path = "./DBpedia_Ont.ttl" - print("Extracting relations offline...") - relations = set() - with open(ontology_file_path, 'r', encoding='utf-8', errors='ignore') as file: - lines = file.readlines() - for i, line in enumerate(lines): - line = line.strip() - # Check if the line starts with a colon and the next lines contain the specified pattern - if line.startswith(":") and i+1 <= len(lines) and "a rdf:Property, owl:ObjectProperty ;" in lines[i+1]: - relation = line.split()[0] # Extracting the relation name - relation = relation[1:] # Remove colon - relations.add(relation) - - - return sorted(relations) - -def extract_specific_relations(): - "Function to extract relations based on the specified pattern" - print("Getting relations from online ontology...") - relations = [] - URL = "http://130.225.57.13/knox-api/triples" - query_string_s = 'http://dbpedia.org/ontology/' - query_string_o = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property' - PARAMS = {"g":"http://knox_ontology", "s": query_string_s, "o": query_string_o} - HEADERS = {"Access-Authorization":"internal_key"} - r = requests.get(url=URL, params=PARAMS, headers=HEADERS) - print(f"db component response: {r.text}") - - data = r.json() - - for triple in data["triples"]: - relation = re.split("http://dbpedia.org/ontology/", triple["s"]["Value"])[1] - relations.append(relation) - - return relations - -if __name__ == "__main__": - extract_specific_relations() \ No newline at end of file diff --git a/relation_extraction/knowledge_graph_messenger.py b/relation_extraction/knowledge_graph_messenger.py new file mode 100644 index 0000000..8ab5c9f --- /dev/null +++ b/relation_extraction/knowledge_graph_messenger.py @@ -0,0 +1,24 @@ +import requests + +from relation_extraction.API_handler import APIHandler + +class KnowledgeGraphMessenger(APIHandler): + def API_endpoint(): + return "http://130.225.57.13/knox-api/triples" + + def send_request(output): + HEADERS = {"Access-Authorization":"internal_key"} + PARAMS={"g": "http://knox_database"} + FORMATTED_OUTPUT = KnowledgeGraphMessenger.format_output(output) + response = requests.post(url=KnowledgeGraphMessenger.API_endpoint(), json=FORMATTED_OUTPUT, params=PARAMS, headers=HEADERS) + print(f"db component response: {response.text}") + return response.text + + @classmethod + def format_output(self, output): + formatted_output = {"triples": output} + return formatted_output + + + + diff --git a/relation_extraction/meta_data_messenger.py b/relation_extraction/meta_data_messenger.py new file mode 100644 index 0000000..94d5992 --- /dev/null +++ b/relation_extraction/meta_data_messenger.py @@ -0,0 +1,8 @@ +from relation_extraction.API_handler import APIHandler + +class MetaDataMessenger(APIHandler): + def API_endpoint(): + pass + + def send_request(request_content): + pass diff --git a/relation_extraction/ontology_messenger.py b/relation_extraction/ontology_messenger.py new file mode 100644 index 0000000..9d6e92a --- /dev/null +++ b/relation_extraction/ontology_messenger.py @@ -0,0 +1,30 @@ +import requests +import re + +from relation_extraction.API_handler import APIHandler + +class OntologyMessenger(APIHandler): + def API_endpoint(): + return "http://130.225.57.13/knox-api/triples" + + def send_request(): + "Function to extract relations based on the specified pattern" + print("Getting relations from online ontology...") + relations = [] + query_string_s = 'http://dbpedia.org/ontology/' + query_string_o = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property' + PARAMS = {"g":"http://knox_ontology", "s": query_string_s, "o": query_string_o} + HEADERS = {"Access-Authorization":"internal_key"} + r = requests.get(url=OntologyMessenger.API_endpoint(), params=PARAMS, headers=HEADERS) + print(f"db component response: {r.text}") + + data = r.json() + + for triple in data["triples"]: + relation = re.split("http://dbpedia.org/ontology/", triple["s"]["Value"])[1] + relations.append(relation) + + return relations + +if __name__ == "__main__": + OntologyMessenger.send_request() \ No newline at end of file diff --git a/relation_extraction/output.py b/relation_extraction/output.py deleted file mode 100644 index 41bc973..0000000 --- a/relation_extraction/output.py +++ /dev/null @@ -1,13 +0,0 @@ -import requests - -def format_output(output): - formatted_output = {"triples": output} - return formatted_output - -def send_to_database_component(output): - URL = "http://130.225.57.13/knox-api/triples" - HEADERS = {"Access-Authorization":"internal_key"} - PARAMS={"g": "http://knox_database"} - response = requests.post(url=URL, json=format_output(output), params=PARAMS, headers=HEADERS) - print(f"db component response: {response.text}") - return response.text \ No newline at end of file diff --git a/relation_extraction/relation_extractor.py b/relation_extraction/relation_extractor.py new file mode 100644 index 0000000..3db32ae --- /dev/null +++ b/relation_extraction/relation_extractor.py @@ -0,0 +1,7 @@ +from relation_extraction.NaiveMVP.main import handle_relation_post_request + + +class RelationExtractor(): + @classmethod + def begin_extraction(self, data): + handle_relation_post_request(data) diff --git a/server/server.py b/server/server.py index 7edfbe3..d35265d 100644 --- a/server/server.py +++ b/server/server.py @@ -2,6 +2,7 @@ import json import os from relation_extraction.NaiveMVP.main import handle_relation_post_request +from relation_extraction.relation_extractor import RelationExtractor class PreProcessingHandler(BaseHTTPRequestHandler): def do_POST(self): @@ -23,7 +24,7 @@ def do_POST(self): if self.path == '/tripleconstruction': try: - handle_relation_post_request(post_content["post_json"]) + RelationExtractor.begin_extraction(post_content["post_json"]) self.send_response(200) self.send_header('Content-type','text/html') self.end_headers() diff --git a/test/test_server/test_get_relations.py b/test/test_server/test_get_relations.py index 850bb1b..d9c3fd6 100644 --- a/test/test_server/test_get_relations.py +++ b/test/test_server/test_get_relations.py @@ -1,7 +1,7 @@ import unittest +from relation_extraction.ontology_messenger import OntologyMessenger from server.server import * from unittest.mock import patch, Mock, MagicMock, mock_open -from relation_extraction.get_relations import * class TestGetRelations(unittest.TestCase): @@ -18,7 +18,7 @@ def test_extract_specific_relations(self, mock_get): mock_get.return_value.json.return_value = response mock_get.return_value.text.return_value = "request response" - relations = extract_specific_relations() + relations = OntologyMessenger.send_request() self.assertEqual(len(relations), 2) self.assertEqual(relations[0], "test") @@ -26,14 +26,6 @@ def test_extract_specific_relations(self, mock_get): mock_get.assert_called_once_with(url='http://130.225.57.13/knox-api/triples', params={'g': 'http://knox_ontology', 's': 'http://dbpedia.org/ontology/', 'o': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property'}, headers={'Access-Authorization': 'internal_key'}) - @patch("builtins.open", new_callable=mock_open, read_data=":testline\na rdf:Property, owl:ObjectProperty ;") - def test_extract_specific_relations_offline(self, mock_open): - res = extract_specific_relations_offline() - mock_open.assert_called_once() - - self.assertEqual(res, ["testline"]) - - if __name__ == "__main__": unittest.main() \ No newline at end of file diff --git a/test/test_server/test_output.py b/test/test_server/test_output.py index ca0d6cc..ed4aad6 100644 --- a/test/test_server/test_output.py +++ b/test/test_server/test_output.py @@ -1,12 +1,12 @@ import unittest -from relation_extraction.output import * +from relation_extraction.knowledge_graph_messenger import KnowledgeGraphMessenger from unittest.mock import patch, Mock, MagicMock class TestOutput(unittest.TestCase): def test_format_output(self): input = [["this", "is", "triples"]] - res = format_output(input) + res = KnowledgeGraphMessenger.format_output(input) self.assertTrue("triples" in res.keys()) self.assertEqual(res["triples"], input) @@ -15,7 +15,7 @@ def test_send_to_database(self, mock_request): mock_response = MagicMock() mock_response.text = "response" mock_request.return_value = mock_response - res = send_to_database_component("test_output") + res = KnowledgeGraphMessenger.send_request("test_output") mock_request.assert_called_once_with(url='http://130.225.57.13/knox-api/triples', json={'triples': 'test_output'}, params={'g': 'http://knox_database'}, headers={'Access-Authorization': 'internal_key'}) self.assertEqual(res, "response") \ No newline at end of file diff --git a/test/test_server/test_pre_processing_handler.py b/test/test_server/test_pre_processing_handler.py index 8d89e54..2faefee 100644 --- a/test/test_server/test_pre_processing_handler.py +++ b/test/test_server/test_pre_processing_handler.py @@ -5,7 +5,7 @@ class TestPreProcessingHandler(unittest.TestCase): @patch("os.getenv") - @patch('server.server.handle_relation_post_request', return_value=Mock()) + @patch('relation_extraction.relation_extractor.RelationExtractor.begin_extraction', return_value=Mock()) @patch.object(PreProcessingHandler, 'wrongly_formatted_request_response') @patch.object(PreProcessingHandler, 'handled_request_body', return_value=True) @patch.object(PreProcessingHandler, '__init__', return_value=None) @@ -57,7 +57,7 @@ def test_do_post_invalid_endpoint(self, mock_init, mock_handled_body, mock_wrong handler.end_headers.assert_called_once() @patch("os.getenv") - @patch('server.server.handle_relation_post_request', return_value=Mock()) + @patch('relation_extraction.relation_extractor.RelationExtractor.begin_extraction', return_value=Mock()) @patch.object(PreProcessingHandler, 'wrongly_formatted_request_response') @patch.object(PreProcessingHandler, 'handled_request_body', return_value=True) @patch.object(PreProcessingHandler, '__init__', return_value=None) diff --git a/test/test_server/test_relation_extraction.py b/test/test_server/test_relation_extraction.py index f1f37bb..9356917 100644 --- a/test/test_server/test_relation_extraction.py +++ b/test/test_server/test_relation_extraction.py @@ -4,7 +4,7 @@ class TestHandleRelationPostRequest(unittest.TestCase): - @mock.patch('relation_extraction.NaiveMVP.main.extract_specific_relations') + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') def test_handle_post_request_raises_exception_if_relations_fail(self, mock_extract_specific_relations): mock_extract_specific_relations.side_effect = Exception() data = dict() @@ -15,7 +15,7 @@ def test_handle_post_request_raises_exception_if_relations_fail(self, mock_extra @mock.patch('relation_extraction.NaiveMVP.main.parse_data') - @mock.patch('relation_extraction.NaiveMVP.main.extract_specific_relations') + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') def test_handle_post_request_raises_exception_if_parse_fail(self, mock_extract_specific_relations, mock_parse_data): mock_extract_specific_relations.return_value = [] mock_parse_data.side_effect = Exception() @@ -28,9 +28,9 @@ def test_handle_post_request_raises_exception_if_parse_fail(self, mock_extract_s mock_parse_data.assert_called_once() - @mock.patch('relation_extraction.NaiveMVP.main.send_to_database_component') + @mock.patch('relation_extraction.knowledge_graph_messenger.KnowledgeGraphMessenger.send_request') @mock.patch('relation_extraction.NaiveMVP.main.parse_data') - @mock.patch('relation_extraction.NaiveMVP.main.extract_specific_relations') + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') def test_handle_post_request_raises_exception_if_db_component_fail(self, mock_extract_specific_relations, mock_parse_data, mock_send_to_db): mock_extract_specific_relations.return_value = [] mock_parse_data.return_value = dict()