From 8c92eeb7fd9b1c5159858e9c70a681bd1ff5b958 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Fri, 19 Jul 2024 00:18:17 +0100 Subject: [PATCH] Address J-M commit --- configurations/app_config.yml | 1 + manage.py | 14 +++++--- .../omero_keyvalue_data_validator.py | 18 +++++----- .../validation/results_validator.py | 34 +++++++++---------- unit_tests/test_app.py | 6 ++-- unit_tests/test_data.py | 2 +- 6 files changed, 40 insertions(+), 35 deletions(-) diff --git a/configurations/app_config.yml b/configurations/app_config.yml index 4c1a2e21..ed4cead4 100644 --- a/configurations/app_config.yml +++ b/configurations/app_config.yml @@ -14,3 +14,4 @@ MAX_RETUNED_ITEMS : 1700000 ELASTICSEARCH_BACKUP_FOLDER: "path/to/elasticsearch/backup/folder" verify_certs: False ELASTIC_PASSWORD: elasticsearch_user_password +BASE_FOLDER: /etc/searchengine/ diff --git a/manage.py b/manage.py index 20ea6964..a8584700 100644 --- a/manage.py +++ b/manage.py @@ -128,11 +128,11 @@ def restore_postgresql_database(): help="resource name, creating all the indexes for all the resources is the default", # noqa ) @manager.option( - "-n", - "--nobackup", + "-b", + "--backup", help="if True, backup will be called ", # noqa ) -def get_index_data_from_database(resource="all", nobackup=None): +def get_index_data_from_database(resource="all", backup="True"): """ insert data in Elasticsearch index for each resource It gets the data from postgres database server @@ -144,7 +144,8 @@ def get_index_data_from_database(resource="all", nobackup=None): get_insert_data_to_index, save_key_value_buckets, ) - + import json + backup=json.loads(backup.lower()) if resource != "all": sql_st = sqls_resources.get(resource) if not sql_st: @@ -160,7 +161,7 @@ def get_index_data_from_database(resource="all", nobackup=None): test_indexing_search_query(deep_check=False, check_studies=True) # backup the index data - if not nobackup: + if backup: backup_elasticsearch_data() @@ -412,4 +413,7 @@ def test_container_key_value(): if __name__ == "__main__": + from flask_script import Command + + Command.capture_all_args = False manager.run() diff --git a/omero_search_engine/validation/omero_keyvalue_data_validator.py b/omero_search_engine/validation/omero_keyvalue_data_validator.py index 905fa812..d9d203d2 100644 --- a/omero_search_engine/validation/omero_keyvalue_data_validator.py +++ b/omero_search_engine/validation/omero_keyvalue_data_validator.py @@ -56,10 +56,10 @@ def check_for_trailing_space(screen_name, project_name): ) tail_space_results = conn.execute_query(sql_statment) if len(tail_space_results) == 0: - search_omero_app.logger.info("No results is availlable for trailing space") + search_omero_app.logger.info("No results is available for trailing space") return search_omero_app.logger.info("Generate for trailing space ...") - genrate_reports(tail_space_results, "tailing_space", screen_name, project_name) + generate_reports(tail_space_results, "trailing_space", screen_name, project_name) def check_for_heading_space(screen_name, project_name): @@ -72,22 +72,22 @@ def check_for_heading_space(screen_name, project_name): search_omero_app.logger.info("No results available for heading space") return search_omero_app.logger.info("Generate for head space ...") - genrate_reports(head_space_results, "heading_space", screen_name, project_name) + generate_reports(head_space_results, "heading_space", screen_name, project_name) def check_duplicated_keyvalue_pairs(screen_name, project_name): - search_omero_app.logger.info("Checking for duplicated key-value pairs ...") + search_omero_app.logger.info("Checking for duplicated key-value pairs...") sql_statment = prepare_the_sql_statement( duplicated_keyvalue_pairs_query, screen_name, project_name, "where" ) duplicated_keyvalue_pairs_results = conn.execute_query(sql_statment) if len(duplicated_keyvalue_pairs_results) == 0: search_omero_app.logger.info( - "No results available for duplicated key-value pairs " + "No results available for duplicated key-value pairs" ) return - search_omero_app.logger.info("Generate reports for duplicated key-value pairs ...") - genrate_reports( + search_omero_app.logger.info("Generate reports for duplicated key-value pairs...") + generate_reports( duplicated_keyvalue_pairs_results, "duplicated_keyvalue_pairs", screen_name, @@ -95,12 +95,12 @@ def check_duplicated_keyvalue_pairs(screen_name, project_name): ) -def genrate_reports(results, check_type, screen_name, project_name): +def generate_reports(results, check_type, screen_name, project_name): """ Generate the output CSV files contents and save them """ df = pd.DataFrame(results) - base_folder = "/etc/searchengine/" + base_folder = search_omero_app.config.get("BASE_FOLDER") if not os.path.isdir(base_folder): base_folder = os.path.expanduser("~") diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index daa6e588..c9d87382 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -59,7 +59,7 @@ "not_in_clause": query_image_in, "screens_count": screens_count, "projects_count": projects_count, - "aviable_values_for_key": query_images_available_values_for_key, + "available_values_for_key": query_images_available_values_for_key, "search_any_value": query_images_any_value, "image_contains_not_contains": query_images_contains_not_contains, } @@ -192,7 +192,7 @@ def get_results_db(self, operator=None): search_omero_app.logger.info("Getting results from postgres") if self.type == "buckets": if self.name: - sql = query_methods["aviable_values_for_key"].substitute(name=self.name) + sql = query_methods["available_values_for_key"].substitute(name=self.name) conn = search_omero_app.config["database_connector"] self.postgres_results = conn.execute_query(sql) elif self.value: @@ -578,7 +578,7 @@ def compare_results(self, operator=None): else: searchengine_no = self.searchengine_results return ( - "not equal, number of the results from the database server is: %s and" + "not equal, the number of results from the database server is: %s and" "the number of results from searchengine is %s?," "\ndatabase server query time= %s, searchengine query time= %s" % (len(self.postgres_results), searchengine_no, sql_time, searchengine_time) @@ -666,7 +666,7 @@ def validate_queries(json_file, deep_check): validator_in.set_in_query(case, resource) res = validator_in.compare_results() messages.append( - "Results for 'in' form the database and search engine" + "Results for 'in' from the database and search engine" "for %s name: %s and value in [%s] are %s" % ( validator_in.resource, @@ -683,7 +683,7 @@ def validate_queries(json_file, deep_check): validator_not_in.set_in_query(case, resource, type="not_in_clause") res = validator_not_in.compare_results() messages.append( - "Results for 'not in' form the database and search engine for %s name: " + "Results for 'not in' from the database and search engine for %s name: " "%s and value in [%s] are %s" % ( validator_not_in.resource, @@ -706,7 +706,7 @@ def validate_queries(json_file, deep_check): "###########################################################################################################" # noqa ) # save the check report to a text file - base_folder = "/etc/searchengine/" + base_folder = search_omero_app.config.get("BASE_FOLDER") if not os.path.isdir(base_folder): base_folder = os.path.expanduser("~") @@ -752,7 +752,7 @@ def test_no_images(): names[name] = int(study[9]) results = {} - base_folder = "/etc/searchengine/" + base_folder = search_omero_app.config.get("BASE_FOLDER") if not os.path.isdir(base_folder): base_folder = os.path.expanduser("~") @@ -815,7 +815,7 @@ def test_no_images(): def get_omero_stats(): values = ["Attribute", "No. buckets", "Total number", "Resource"] - base_folder = "/etc/searchengine/" + base_folder = search_omero_app.config.get("BASE_FOLDER") if not os.path.isdir(base_folder): base_folder = os.path.expanduser("~") stats_file = os.path.join(base_folder, "stats.csv") @@ -869,8 +869,8 @@ def check_number_images_sql_containers_using_ids(): This method tests the number of images inside each container (project or screen) in the searchengine index data and compare them with the number of images inside - each container in the database server - As container name is not unique,container id is used + each container in the database server. + As container name is not unique, container id is used to determine the number of images """ from omero_search_engine.api.v1.resources.urls import ( @@ -913,11 +913,11 @@ def check_number_images_sql_containers_using_ids(): returned_results = search_resource_annotation("image", query_data) if returned_results.get("results"): if returned_results.get("results").get("size"): - seachengine_results = returned_results["results"]["size"] + searchengine_results = returned_results["results"]["size"] else: - seachengine_results = 0 + searchengine_results = 0 search_omero_app.logger.info( - "No of images returned from searchengine: %s" % seachengine_results + "Number of images returned from searchengine: %s" % searchengine_results ) if resource == "project": sql = query_images_in_project_id.substitute(project_id=res_id) @@ -926,9 +926,9 @@ def check_number_images_sql_containers_using_ids(): results = conn.execute_query(sql) postgres_results = len(results) search_omero_app.logger.info( - "No of images returned from postgresql: %s" % postgres_results + "Number of images returned from the database: %s" % postgres_results ) - if seachengine_results != postgres_results: + if searchengine_results != postgres_results: if res_name == "idr0021" and res_id == 872: # """ # issue with these two images: @@ -937,7 +937,7 @@ def check_number_images_sql_containers_using_ids(): # """ continue dd = False - if seachengine_results > 0: + if searchengine_results > 0: test_array = [] for res in returned_results["results"]["results"]: test_array.append(res.get("id")) @@ -1030,7 +1030,7 @@ def get_no_images_sql_containers(write_report=True): "\n-----------------------------------------------------------------------------\n" # noqa ) if write_report: - base_folder = "/etc/searchengine/" + base_folder = search_omero_app.config.get("BASE_FOLDER") if not os.path.isdir(base_folder): base_folder = os.path.expanduser("~") report_file = os.path.join(base_folder, "check_containers_report.txt") diff --git a/unit_tests/test_app.py b/unit_tests/test_app.py index 235bad36..5566739f 100644 --- a/unit_tests/test_app.py +++ b/unit_tests/test_app.py @@ -57,7 +57,7 @@ query_in, images_keys, images_value_parts, - contains_not_contains_quries, + contains_not_contains_queries, image_owner, image_group, image_owner_group, @@ -274,8 +274,8 @@ def test_available_values_for_key(self): validator.searchengine_results.get("total_number_of_buckets"), ) - def test_contains_not_contains_quries(self): - for resource, cases in contains_not_contains_quries.items(): + def test_contains_not_contains_queries(self): + for resource, cases in contains_not_contains_queries.items(): for case in cases: name = case[0] value = case[1] diff --git a/unit_tests/test_data.py b/unit_tests/test_data.py index f19c789f..6ccf7292 100644 --- a/unit_tests/test_data.py +++ b/unit_tests/test_data.py @@ -83,7 +83,7 @@ ] } -contains_not_contains_quries = {"image": [["cell line", "hel"], ["gene symbol", "cep"]]} +contains_not_contains_queries = {"image": [["cell line", "hel"], ["gene symbol", "cep"]]} query_in = { "image": [