Skip to content

Commit

Permalink
Address J-M commit
Browse files Browse the repository at this point in the history
  • Loading branch information
khaledk2 committed Jul 18, 2024
1 parent 6c8ca41 commit 8c92eeb
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 35 deletions.
1 change: 1 addition & 0 deletions configurations/app_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ MAX_RETUNED_ITEMS : 1700000
ELASTICSEARCH_BACKUP_FOLDER: "path/to/elasticsearch/backup/folder"
verify_certs: False
ELASTIC_PASSWORD: elasticsearch_user_password
BASE_FOLDER: /etc/searchengine/
14 changes: 9 additions & 5 deletions manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ def restore_postgresql_database():
help="resource name, creating all the indexes for all the resources is the default", # noqa
)
@manager.option(
"-n",
"--nobackup",
"-b",
"--backup",
help="if True, backup will be called ", # noqa
)
def get_index_data_from_database(resource="all", nobackup=None):
def get_index_data_from_database(resource="all", backup="True"):
"""
insert data in Elasticsearch index for each resource
It gets the data from postgres database server
Expand All @@ -144,7 +144,8 @@ def get_index_data_from_database(resource="all", nobackup=None):
get_insert_data_to_index,
save_key_value_buckets,
)

import json
backup=json.loads(backup.lower())
if resource != "all":
sql_st = sqls_resources.get(resource)
if not sql_st:
Expand All @@ -160,7 +161,7 @@ def get_index_data_from_database(resource="all", nobackup=None):
test_indexing_search_query(deep_check=False, check_studies=True)

# backup the index data
if not nobackup:
if backup:
backup_elasticsearch_data()


Expand Down Expand Up @@ -412,4 +413,7 @@ def test_container_key_value():


if __name__ == "__main__":
from flask_script import Command

Command.capture_all_args = False
manager.run()
18 changes: 9 additions & 9 deletions omero_search_engine/validation/omero_keyvalue_data_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ def check_for_trailing_space(screen_name, project_name):
)
tail_space_results = conn.execute_query(sql_statment)
if len(tail_space_results) == 0:
search_omero_app.logger.info("No results is availlable for trailing space")
search_omero_app.logger.info("No results is available for trailing space")
return
search_omero_app.logger.info("Generate for trailing space ...")
genrate_reports(tail_space_results, "tailing_space", screen_name, project_name)
generate_reports(tail_space_results, "trailing_space", screen_name, project_name)


def check_for_heading_space(screen_name, project_name):
Expand All @@ -72,35 +72,35 @@ def check_for_heading_space(screen_name, project_name):
search_omero_app.logger.info("No results available for heading space")
return
search_omero_app.logger.info("Generate for head space ...")
genrate_reports(head_space_results, "heading_space", screen_name, project_name)
generate_reports(head_space_results, "heading_space", screen_name, project_name)


def check_duplicated_keyvalue_pairs(screen_name, project_name):
search_omero_app.logger.info("Checking for duplicated key-value pairs ...")
search_omero_app.logger.info("Checking for duplicated key-value pairs...")
sql_statment = prepare_the_sql_statement(
duplicated_keyvalue_pairs_query, screen_name, project_name, "where"
)
duplicated_keyvalue_pairs_results = conn.execute_query(sql_statment)
if len(duplicated_keyvalue_pairs_results) == 0:
search_omero_app.logger.info(
"No results available for duplicated key-value pairs "
"No results available for duplicated key-value pairs"
)
return
search_omero_app.logger.info("Generate reports for duplicated key-value pairs ...")
genrate_reports(
search_omero_app.logger.info("Generate reports for duplicated key-value pairs...")
generate_reports(
duplicated_keyvalue_pairs_results,
"duplicated_keyvalue_pairs",
screen_name,
project_name,
)


def genrate_reports(results, check_type, screen_name, project_name):
def generate_reports(results, check_type, screen_name, project_name):
"""
Generate the output CSV files contents and save them
"""
df = pd.DataFrame(results)
base_folder = "/etc/searchengine/"
base_folder = search_omero_app.config.get("BASE_FOLDER")
if not os.path.isdir(base_folder):
base_folder = os.path.expanduser("~")

Expand Down
34 changes: 17 additions & 17 deletions omero_search_engine/validation/results_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
"not_in_clause": query_image_in,
"screens_count": screens_count,
"projects_count": projects_count,
"aviable_values_for_key": query_images_available_values_for_key,
"available_values_for_key": query_images_available_values_for_key,
"search_any_value": query_images_any_value,
"image_contains_not_contains": query_images_contains_not_contains,
}
Expand Down Expand Up @@ -192,7 +192,7 @@ def get_results_db(self, operator=None):
search_omero_app.logger.info("Getting results from postgres")
if self.type == "buckets":
if self.name:
sql = query_methods["aviable_values_for_key"].substitute(name=self.name)
sql = query_methods["available_values_for_key"].substitute(name=self.name)
conn = search_omero_app.config["database_connector"]
self.postgres_results = conn.execute_query(sql)
elif self.value:
Expand Down Expand Up @@ -578,7 +578,7 @@ def compare_results(self, operator=None):
else:
searchengine_no = self.searchengine_results
return (
"not equal, number of the results from the database server is: %s and"
"not equal, the number of results from the database server is: %s and"
"the number of results from searchengine is %s?,"
"\ndatabase server query time= %s, searchengine query time= %s"
% (len(self.postgres_results), searchengine_no, sql_time, searchengine_time)
Expand Down Expand Up @@ -666,7 +666,7 @@ def validate_queries(json_file, deep_check):
validator_in.set_in_query(case, resource)
res = validator_in.compare_results()
messages.append(
"Results for 'in' form the database and search engine"
"Results for 'in' from the database and search engine"
"for %s name: %s and value in [%s] are %s"
% (
validator_in.resource,
Expand All @@ -683,7 +683,7 @@ def validate_queries(json_file, deep_check):
validator_not_in.set_in_query(case, resource, type="not_in_clause")
res = validator_not_in.compare_results()
messages.append(
"Results for 'not in' form the database and search engine for %s name: "
"Results for 'not in' from the database and search engine for %s name: "
"%s and value in [%s] are %s"
% (
validator_not_in.resource,
Expand All @@ -706,7 +706,7 @@ def validate_queries(json_file, deep_check):
"###########################################################################################################" # noqa
)
# save the check report to a text file
base_folder = "/etc/searchengine/"
base_folder = search_omero_app.config.get("BASE_FOLDER")
if not os.path.isdir(base_folder):
base_folder = os.path.expanduser("~")

Expand Down Expand Up @@ -752,7 +752,7 @@ def test_no_images():
names[name] = int(study[9])

results = {}
base_folder = "/etc/searchengine/"
base_folder = search_omero_app.config.get("BASE_FOLDER")
if not os.path.isdir(base_folder):
base_folder = os.path.expanduser("~")

Expand Down Expand Up @@ -815,7 +815,7 @@ def test_no_images():

def get_omero_stats():
values = ["Attribute", "No. buckets", "Total number", "Resource"]
base_folder = "/etc/searchengine/"
base_folder = search_omero_app.config.get("BASE_FOLDER")
if not os.path.isdir(base_folder):
base_folder = os.path.expanduser("~")
stats_file = os.path.join(base_folder, "stats.csv")
Expand Down Expand Up @@ -869,8 +869,8 @@ def check_number_images_sql_containers_using_ids():
This method tests the number of images inside each container
(project or screen) in the searchengine index data
and compare them with the number of images inside
each container in the database server
As container name is not unique,container id is used
each container in the database server.
As container name is not unique, container id is used
to determine the number of images
"""
from omero_search_engine.api.v1.resources.urls import (
Expand Down Expand Up @@ -913,11 +913,11 @@ def check_number_images_sql_containers_using_ids():
returned_results = search_resource_annotation("image", query_data)
if returned_results.get("results"):
if returned_results.get("results").get("size"):
seachengine_results = returned_results["results"]["size"]
searchengine_results = returned_results["results"]["size"]
else:
seachengine_results = 0
searchengine_results = 0
search_omero_app.logger.info(
"No of images returned from searchengine: %s" % seachengine_results
"Number of images returned from searchengine: %s" % searchengine_results
)
if resource == "project":
sql = query_images_in_project_id.substitute(project_id=res_id)
Expand All @@ -926,9 +926,9 @@ def check_number_images_sql_containers_using_ids():
results = conn.execute_query(sql)
postgres_results = len(results)
search_omero_app.logger.info(
"No of images returned from postgresql: %s" % postgres_results
"Number of images returned from the database: %s" % postgres_results
)
if seachengine_results != postgres_results:
if searchengine_results != postgres_results:
if res_name == "idr0021" and res_id == 872:
# """
# issue with these two images:
Expand All @@ -937,7 +937,7 @@ def check_number_images_sql_containers_using_ids():
# """
continue
dd = False
if seachengine_results > 0:
if searchengine_results > 0:
test_array = []
for res in returned_results["results"]["results"]:
test_array.append(res.get("id"))
Expand Down Expand Up @@ -1030,7 +1030,7 @@ def get_no_images_sql_containers(write_report=True):
"\n-----------------------------------------------------------------------------\n" # noqa
)
if write_report:
base_folder = "/etc/searchengine/"
base_folder = search_omero_app.config.get("BASE_FOLDER")
if not os.path.isdir(base_folder):
base_folder = os.path.expanduser("~")
report_file = os.path.join(base_folder, "check_containers_report.txt")
Expand Down
6 changes: 3 additions & 3 deletions unit_tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
query_in,
images_keys,
images_value_parts,
contains_not_contains_quries,
contains_not_contains_queries,
image_owner,
image_group,
image_owner_group,
Expand Down Expand Up @@ -274,8 +274,8 @@ def test_available_values_for_key(self):
validator.searchengine_results.get("total_number_of_buckets"),
)

def test_contains_not_contains_quries(self):
for resource, cases in contains_not_contains_quries.items():
def test_contains_not_contains_queries(self):
for resource, cases in contains_not_contains_queries.items():
for case in cases:
name = case[0]
value = case[1]
Expand Down
2 changes: 1 addition & 1 deletion unit_tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
]
}

contains_not_contains_quries = {"image": [["cell line", "hel"], ["gene symbol", "cep"]]}
contains_not_contains_queries = {"image": [["cell line", "hel"], ["gene symbol", "cep"]]}

query_in = {
"image": [
Expand Down

0 comments on commit 8c92eeb

Please sign in to comment.