-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_load_IndexHandler.py
139 lines (104 loc) · 4.41 KB
/
test_load_IndexHandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import pytest
import sys
import pandas as pd
from typing import List, Tuple
from unittest.mock import Mock
from pandas import Timestamp
sys.path.append(".")
from load.core.dbHandler.IndexHandler import IndexHandler
from load.core.GraphHandler import GraphHandler
class TestIndexHandler:
"""
Test class for IndexHandler
"""
@classmethod
def setup_class(self):
self.m4ml_example_dataframe = pd.read_json(
"./tests/Test_files/load_files/hf_transformed_fair4ml_example.json"
)
self.graph_handler = GraphHandler(
SQLHandler=Mock(),
RDFHandler=Mock(),
IndexHandler=Mock(),
kg_files_directory="./tests/Test_files/load_files/kg_files",
)
@pytest.fixture
def elasticsearch_handler(self) -> IndexHandler:
elasticsearch_handler = IndexHandler(
es_host="elastic",
es_port=9200,
)
elasticsearch_handler.initialize_HF_index(index_name="test_hf_models")
yield elasticsearch_handler
elasticsearch_handler.clean_indices()
elasticsearch_handler.es.close()
def test_index_one_model(self, elasticsearch_handler):
"""
Test the index_one_model method
"""
row = self.m4ml_example_dataframe.iloc[0]
model_uri = self.graph_handler.text_to_uri_term(
row["schema.org:name"][0]["data"]
)
index_model_entity = elasticsearch_handler.create_hf_index_entity(
row, model_uri
)
# self.add_document(index_name="hf_models", document=index_model_entity)
index_model_entity.save(using=elasticsearch_handler.es, index="test_hf_models")
elasticsearch_handler.es.indices.refresh(index="test_hf_models")
# Check if the document was added to the index
response = elasticsearch_handler.es.search(
index="test_hf_models", body={"query": {"match_all": {}}}
)
assert response["hits"]["total"]["value"] == 1
def test_index_one_model_and_update(self, elasticsearch_handler):
"""
Test the index_one_model method
"""
row = self.m4ml_example_dataframe.iloc[0]
model_uri = self.graph_handler.text_to_uri_term(
row["schema.org:name"][0]["data"]
)
index_model_entity = elasticsearch_handler.create_hf_index_entity(
row, model_uri
)
index_model_entity.save(using=elasticsearch_handler.es, index="test_hf_models")
elasticsearch_handler.es.indices.refresh(index="test_hf_models")
# Check if the document was added to the index
response = elasticsearch_handler.es.search(
index="test_hf_models", body={"query": {"match_all": {}}}
)
assert response["hits"]["total"]["value"] == 1
index_model_entity.name = "updated_name"
elasticsearch_handler.update_document(
index_name="test_hf_models",
document_id=index_model_entity.meta.id,
document=index_model_entity.to_dict(),
)
elasticsearch_handler.es.indices.refresh(index="test_hf_models")
response = elasticsearch_handler.es.search(
index="test_hf_models", body={"query": {"match": {"name": "updated_name"}}}
)
assert response["hits"]["total"]["value"] == 1
def test_index_multiple_models(self, elasticsearch_handler):
"""
Test the index_multiple_models method
"""
index_model_entities = []
for _, row in self.m4ml_example_dataframe.iterrows():
model_uri = self.graph_handler.text_to_uri_term(
row["schema.org:name"][0]["data"]
)
index_model_entity = elasticsearch_handler.create_hf_index_entity(
row, model_uri
)
index_model_entities.append(index_model_entity)
# self.add_document(index_name="hf_models", document=index_model_entity)
# index_model_entity.save(using=elasticsearch_handler.es , index="hf_models")
elasticsearch_handler.add_documents(documents=index_model_entities)
elasticsearch_handler.es.indices.refresh(index="test_hf_models")
response = elasticsearch_handler.es.search(
index="test_hf_models", body={"query": {"match_all": {}}}
)
print(response["hits"]["total"]["value"])
assert response["hits"]["total"]["value"] == len(self.m4ml_example_dataframe)