From 1391c928979d135f1a5b75ca37cc4edc524854d0 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Fri, 27 Sep 2024 23:19:47 +0100 Subject: [PATCH] fix per commit issues --- configurations/configuration.py | 1 + manage.py | 17 +++-- .../api/v1/resources/query_handler.py | 31 +++++--- .../api/v1/resources/resource_analyser.py | 70 +++++++++++------- omero_search_engine/api/v1/resources/urls.py | 54 ++++++++++---- omero_search_engine/api/v1/resources/utils.py | 63 +++++++++++----- .../elasticsearch/transform_data.py | 14 ++-- omero_search_engine/database/utils.py | 24 +++--- .../validation/results_validator.py | 74 ++++++++++--------- unit_tests/test_app.py | 23 +++--- 10 files changed, 231 insertions(+), 140 deletions(-) diff --git a/configurations/configuration.py b/configurations/configuration.py index 66fccb6..d84edf1 100644 --- a/configurations/configuration.py +++ b/configurations/configuration.py @@ -104,6 +104,7 @@ def update_config_file(updated_configuration, configure_database=False): with open(app_config.INSTANCE_CONFIG, "w") as f: yaml.dump(configuration, f) + def config_database(configuration, updated_configuration): for data_source in configuration.get("DATA_SOURCES"): changed = False diff --git a/manage.py b/manage.py index be3a78d..14aa37b 100644 --- a/manage.py +++ b/manage.py @@ -123,6 +123,7 @@ def restore_postgresql_database(source="all"): restore_database(source) + @manager.command @manager.option( "-r", @@ -181,7 +182,10 @@ def get_index_data_from_database(resource="all", source="all", backup="True"): clean_index=clean_index, only_values=False, ) - print("!Done for data_source: %s from %s" % (data_source, search_omero_app.config.database_connectors.keys())) + print( + "!Done for data_source: %s from %s" + % (data_source, search_omero_app.config.database_connectors.keys()) + ) if clean_index: clean_index = False @@ -190,9 +194,9 @@ def get_index_data_from_database(resource="all", source="all", backup="True"): source=data_source, deep_check=False, check_studies=True ) - #backup the index data + # backup the index data if backup: - backup_elasticsearch_data() + backup_elasticsearch_data() # set configurations @@ -338,7 +342,6 @@ def set_no_processes(no_processes=None): "--data_source", help="data source name, the default is all", # noqa ) - @manager.option( "-r", "--resource", @@ -350,7 +353,9 @@ def set_no_processes(no_processes=None): help="creating the elastic search index if set to True", # noqa ) @manager.option("-o", "--only_values", help="creating cached values only ") -def cache_key_value_index(resource=None, data_source='all',create_index=None, only_values=None): +def cache_key_value_index( + resource=None, data_source="all", create_index=None, only_values=None +): """ Cache the value bucket for each value for each resource """ @@ -358,7 +363,7 @@ def cache_key_value_index(resource=None, data_source='all',create_index=None, on save_key_value_buckets, ) - save_key_value_buckets(resource,data_source ,create_index, only_values) + save_key_value_buckets(resource, data_source, create_index, only_values) @manager.command diff --git a/omero_search_engine/api/v1/resources/query_handler.py b/omero_search_engine/api/v1/resources/query_handler.py index 225f6c0..03d4c7e 100644 --- a/omero_search_engine/api/v1/resources/query_handler.py +++ b/omero_search_engine/api/v1/resources/query_handler.py @@ -192,7 +192,7 @@ def __init__( columns_def, return_columns, return_containers, - data_source + data_source, ): self.or_query_group = or_query_group self.and_query_group = and_query_group @@ -205,7 +205,7 @@ def __init__( self.additional_image_conds = [] self.return_columns = return_columns self.return_containers = return_containers - self.data_source=data_source + self.data_source = data_source def get_image_non_image_query(self): res = None @@ -425,7 +425,7 @@ def run_query(self, query_, resource): self.raw_elasticsearch_query, main_attributes, return_containers=self.return_containers, - data_source=self.data_source + data_source=self.data_source, ) else: res = search_query( @@ -435,7 +435,7 @@ def run_query(self, query_, resource): pagination_dict, self.raw_elasticsearch_query, main_attributes, - data_source=self.data_source + data_source=self.data_source, ) if resource != "image": @@ -454,7 +454,7 @@ def search_query( raw_elasticsearch_query, main_attributes=None, return_containers=False, - data_source=None + data_source=None, ): search_omero_app.logger.info( "-------------------------------------------------" @@ -484,16 +484,19 @@ def search_query( bookmark=bookmark, pagination_dict=pagination_dict, return_containers=return_containers, - data_source=data_source + data_source=data_source, ) else: # Should have a method to search the elasticsearch and # returns the containers only, # It is hard coded in the util search_annotation method. ress = search_resource_annotation( - resource, q_data.get("query"), return_containers=return_containers, data_source=data_source + resource, + q_data.get("query"), + return_containers=return_containers, + data_source=data_source, ) - if type (ress) is str: + if type(ress) is str: return ress ress["Error"] = "none" @@ -644,7 +647,9 @@ def process_search_results(results, resource, columns_def): return returned_results -def determine_search_results_(query_,data_source="all", return_columns=False, return_containers=False): +def determine_search_results_( + query_, data_source="all", return_columns=False, return_containers=False +): from omero_search_engine.api.v1.resources.utils import build_error_message if query_.get("query_details"): @@ -750,7 +755,7 @@ def determine_search_results_(query_,data_source="all", return_columns=False, re columns_def, return_columns, return_containers, - data_source + data_source, ) query_results = query_runner.get_image_non_image_query() return query_results @@ -787,7 +792,7 @@ def simple_search( {"query_details": query_details}, bookmark=bookmark, return_containers=return_containers, - data_source=data_source + data_source=data_source, ) else: and_filters.append( @@ -798,7 +803,9 @@ def simple_search( "resource": "project", } ) - return determine_search_results_({"query_details": query_details},data_source=data_source) + return determine_search_results_( + {"query_details": query_details}, data_source=data_source + ) def add_local_schemas_to(resolver, schema_folder, base_uri, schema_ext=".json"): diff --git a/omero_search_engine/api/v1/resources/resource_analyser.py b/omero_search_engine/api/v1/resources/resource_analyser.py index 3a681d0..195e2aa 100644 --- a/omero_search_engine/api/v1/resources/resource_analyser.py +++ b/omero_search_engine/api/v1/resources/resource_analyser.py @@ -264,7 +264,7 @@ def get_all_values_for_a_key(table_, data_source, key): try: res = search_index_for_value(res_index, query) except Exception as ex: - print("Query: %s Error: %s"%(query,str(ex))) + print("Query: %s Error: %s" % (query, str(ex))) raise ex number_of_buckets = ( res.get("aggregations") @@ -337,6 +337,7 @@ def get_values_for_a_key(table_, key, data_source): start_time = time.time() res = search_index_for_value(res_index, query) query_time = "%.2f" % (time.time() - start_time) + print("Query time: %s" % query_time) returned_results = [] if res.get("aggregations"): for bucket in ( @@ -382,7 +383,7 @@ def prepare_search_results(results, size=0): continue row = {} returned_results.append(row) - row["Data Source"]=res["data_source"] + row["Data Source"] = res["data_source"] row["Key"] = res["Attribute"] row["Value"] = res["Value"] row["Number of %ss" % resource] = res.get("items_in_the_bucket") @@ -491,13 +492,16 @@ def query_cashed_bucket_part_value_keys( name = name.strip() value = adjust_value(value) if data_source and data_source.strip() and data_source.lower() != "all": - data_source = [itm.strip().lower() for itm in data_source.split(',')] + data_source = [itm.strip().lower() for itm in data_source.split(",")] else: data_source = get_data_sources() if resource != "all": query = key_part_values_buckets_template.substitute( - name=name, value=value, resource=resource, data_source=json.dumps(data_source) + name=name, + value=value, + resource=resource, + data_source=json.dumps(data_source), ) res = search_index_for_values_get_all_buckets(es_index, query) returned_results = prepare_search_results_buckets(res) @@ -510,7 +514,10 @@ def query_cashed_bucket_part_value_keys( if table == "image1": continue query = key_part_values_buckets_template.substitute( - name=name, value=value, resource=table, data_source=json.dumps(data_source) + name=name, + value=value, + resource=table, + data_source=json.dumps(data_source), ) res = search_index_for_values_get_all_buckets(es_index, query) returned_results[table] = prepare_search_results_buckets(res) @@ -522,9 +529,9 @@ def query_cashed_bucket( ): # returns possible matches for a specific resource if data_source and data_source.strip() and data_source.lower() != "all": - data_source = [itm.strip().lower() for itm in data_source.split(',')] + data_source = [itm.strip().lower() for itm in data_source.split(",")] else: - data_source =get_data_sources() + data_source = get_data_sources() if name: name = name.strip() @@ -564,9 +571,9 @@ def search_value_for_resource( value = adjust_value(value) if data_source and data_source.lower() != "all": - data_source = [itm.strip().lower() for itm in data_source.split(',')] + data_source = [itm.strip().lower() for itm in data_source.split(",")] else: - data_source=get_data_sources() + data_source = get_data_sources() if table_ != "all": query = resource_key_values_buckets_template.substitute( @@ -737,7 +744,7 @@ def search_value_for_resource( ) key_values_buckets_template_with_data_source = Template( - """ + """ {"query":{"bool":{"must":[{"bool":{"must":{"match":{ "resource.keyresource":"$resource"}}}},{"bool": {"must": {"match": {"data_source.keyvalue":$data_source}}}}]}}} """ @@ -774,7 +781,9 @@ def get_restircted_search_terms(): return restricted_search_terms -def get_resource_attributes(resource, data_source=None, mode=None, es_index="key_values_resource_cach"): +def get_resource_attributes( + resource, data_source=None, mode=None, es_index="key_values_resource_cach" +): """ return the available attributes for one or all resources """ @@ -785,34 +794,35 @@ def get_resource_attributes(resource, data_source=None, mode=None, es_index="key you may remove it to return all the keys." ) returned_results = [] - if data_source and data_source.lower() !="all": - data_source = [itm.strip().lower() for itm in data_source.split(',')] - all_data_sources=get_data_sources() + if data_source and data_source.lower() != "all": + data_source = [itm.strip().lower() for itm in data_source.split(",")] + all_data_sources = get_data_sources() for data_s in all_data_sources: - if data_source and data_source !="all" and data_s.lower() not in data_source: + if data_source and data_source != "all" and data_s.lower() not in data_source: continue returned_results_ = {} returned_results_["data_source"] = data_s returned_results.append(returned_results_) if resource != "all": - query = key_values_buckets_template_with_data_source.substitute(resource=resource, data_source=json.dumps(data_s)) - #else: + query = key_values_buckets_template_with_data_source.substitute( + resource=resource, data_source=json.dumps(data_s) + ) + # else: # query = key_values_buckets_template_2.substitute(resource=resource) res = connect_elasticsearch( es_index, query ) # es.search(index=es_index, body=query) - hits = res["hits"]["hits"] if len(hits) > 0: returned_results_[resource] = hits[0]["_source"]["name"] - else: for table in resource_elasticsearchindex: - query = key_values_buckets_template_with_data_source.substitute(resource=table, - data_source=json.dumps(data_s)) - #else: + query = key_values_buckets_template_with_data_source.substitute( + resource=table, data_source=json.dumps(data_s) + ) + # else: # query = key_values_buckets_template_2.substitute(resource=table) res = connect_elasticsearch( es_index, query @@ -829,7 +839,7 @@ def get_resource_attributes(resource, data_source=None, mode=None, es_index="key search_terms = list(set(restricted_search_terms[k]) & set(val)) if len(search_terms) > 0: restircted_resources[k] = search_terms - returned_results.append( restircted_resources) + returned_results.append(restircted_resources) if "project" in returned_results: returned_results_["project"].append("name") @@ -911,10 +921,14 @@ def get_resource_names(resource, name=None, description=False, data_source=None) return returned_results -def get_the_results(resource, name, description, data_source, es_index="key_values_resource_cach"): +def get_the_results( + resource, name, description, data_source, es_index="key_values_resource_cach" +): returned_results = {} if data_source: - query = key_values_buckets_template_with_data_source.substitute(resource=resource, data_source=data_source) + query = key_values_buckets_template_with_data_source.substitute( + resource=resource, data_source=data_source + ) else: query = key_values_buckets_template_2.substitute(resource=resource) results_ = connect_elasticsearch( @@ -959,11 +973,13 @@ def get_the_results(resource, name, description, data_source, es_index="key_valu return returned_results -def get_container_values_for_key(table_, container_name, csv, ret_data_source=None, key=None): +def get_container_values_for_key( + table_, container_name, csv, ret_data_source=None, key=None +): returned_results = [] pr_names = get_resource_names("all") if ret_data_source: - ret_data_source = [itm.strip().lower() for itm in ret_data_source.split(',')] + ret_data_source = [itm.strip().lower() for itm in ret_data_source.split(",")] for resourse, names_ in pr_names.items(): for data_source, names in names_.items(): if ret_data_source: diff --git a/omero_search_engine/api/v1/resources/urls.py b/omero_search_engine/api/v1/resources/urls.py index 2219b8a..3423b32 100644 --- a/omero_search_engine/api/v1/resources/urls.py +++ b/omero_search_engine/api/v1/resources/urls.py @@ -93,7 +93,7 @@ def search_resource_page(resource_table): return_containers = data.get("return_containers") data_source = request.args.get("data_source") if data_source: - data_source=data_source.strip() + data_source = data_source.strip() if return_containers: return_containers = json.loads(return_containers.lower()) @@ -104,7 +104,7 @@ def search_resource_page(resource_table): bookmark=bookmark, pagination_dict=pagination_dict, return_containers=return_containers, - data_source=data_source + data_source=data_source, ) return jsonify(resource_list) else: @@ -181,7 +181,10 @@ def search_resource(resource_table): return_containers = json.loads(return_containers.lower()) resource_list = search_resource_annotation( - resource_table, query, return_containers=return_containers, data_source=data_source + resource_table, + query, + return_containers=return_containers, + data_source=data_source, ) return jsonify(resource_list) else: @@ -214,7 +217,9 @@ def get_values_using_value(resource_table): if key: # If the key is provided it will restrict the search to the provided key. - return query_cashed_bucket_part_value_keys(key, value,data_source, resource_table) + return query_cashed_bucket_part_value_keys( + key, value, data_source, resource_table + ) bookmark = request.args.get("bookmark") if bookmark: bookmark = bookmark.split(",") @@ -245,7 +250,9 @@ def get_values_using_value(resource_table): ) ) ) - return jsonify(search_value_for_resource(resource_table, value, data_source, bookmark)) + return jsonify( + search_value_for_resource(resource_table, value, data_source, bookmark) + ) @resources.route("//searchvaluesusingkey/", methods=["GET"]) @@ -270,7 +277,7 @@ def search_values_for_a_key(resource_table): except Exception: csv = False - return get_key_values_return_contents(key, resource_table,data_source, csv) + return get_key_values_return_contents(key, resource_table, data_source, csv) # getannotationkeys==> keys @@ -287,7 +294,9 @@ def get_resource_keys(resource_table): data_source = request.args.get("data_source") if data_source: data_source = data_source.strip() - resource_keys = get_resource_attributes(resource_table, data_source=data_source, mode=mode) + resource_keys = get_resource_attributes( + resource_table, data_source=data_source, mode=mode + ) return jsonify(resource_keys) @@ -327,10 +336,10 @@ def get_resource_names_(resource_table): value = request.args.get("value") description = request.args.get("use_description") data_source = request.args.get("data_source") - data_source=check_empty_string(data_source) + data_source = check_empty_string(data_source) if data_source: - data_source=data_source.strip(",") - data_source=json.dumps(data_source) + data_source = data_source.strip(",") + data_source = json.dumps(data_source) if description: if description.lower() in ["true", "false"]: description = json.loads(description.lower()) @@ -365,7 +374,12 @@ def submit_query_return_containers(): validation_results = query_validator(query) if validation_results == "OK": return jsonify( - determine_search_results_(query, data_source=data_source, return_columns=return_columns, return_containers=True) + determine_search_results_( + query, + data_source=data_source, + return_columns=return_columns, + return_containers=True, + ) ) else: return jsonify(build_error_message(validation_results)) @@ -395,7 +409,11 @@ def submit_query(): validation_results = query_validator(query) if validation_results == "OK": - return jsonify(determine_search_results_(query,data_source=data_source, return_columns=return_columns)) + return jsonify( + determine_search_results_( + query, data_source=data_source, return_columns=return_columns + ) + ) else: return jsonify(build_error_message(validation_results)) @@ -412,7 +430,7 @@ def search(resource_table): operator = request.args.get("operator") bookmark = request.args.get("bookmark") data_source = request.args.get("data_source") - data_source=check_empty_string(data_source) + data_source = check_empty_string(data_source) return_containers = request.args.get("return_containers") if return_containers: return_containers = json.loads(return_containers.lower()) @@ -450,7 +468,9 @@ def container_key_values_search(resource_table): csv = json.loads(csv.lower()) except Exception: csv = False - results = get_container_values_for_key(resource_table, container_name, csv, ret_data_source=data_source, key=key) + results = get_container_values_for_key( + resource_table, container_name, csv, ret_data_source=data_source, key=key + ) return results @@ -468,11 +488,13 @@ def container_keys_search(resource_table): return build_error_message("Container name is required") csv = request.args.get("csv") - data_source=request.args.get("data_source") + data_source = request.args.get("data_source") if csv: try: csv = json.loads(csv.lower()) except Exception: csv = False - results = get_container_values_for_key(resource_table, container_name, csv, ret_data_source=data_source) + results = get_container_values_for_key( + resource_table, container_name, csv, ret_data_source=data_source + ) return results diff --git a/omero_search_engine/api/v1/resources/utils.py b/omero_search_engine/api/v1/resources/utils.py index b11468f..ca55cb0 100644 --- a/omero_search_engine/api/v1/resources/utils.py +++ b/omero_search_engine/api/v1/resources/utils.py @@ -223,8 +223,8 @@ def elasticsearch_query_builder( for clause in main_attributes.get("and_main_attributes"): if isinstance(clause, list): for attribute in clause: - if attribute["operator"].strip()=="in": - ## it is assuming that in operator value is a lit + if attribute["operator"].strip() == "in": + # it is assuming that in operator value is a lit main_dd = main_attribute_query_in_template.substitute( attribute=attribute["name"].strip(), value=json.dumps(attribute["value"]), @@ -244,7 +244,10 @@ def elasticsearch_query_builder( attribute=attribute["name"].strip(), value=str(attribute["value"]).strip(), ) - if attribute["operator"].strip() == "equals" or attribute["operator"].strip() == "in": + if ( + attribute["operator"].strip() == "equals" + or attribute["operator"].strip() == "in" + ): nested_must_part.append(main_dd) elif attribute["operator"].strip() == "not_equals": nested_must_not_part.append(main_dd) @@ -252,10 +255,10 @@ def elasticsearch_query_builder( else: attribute = clause if attribute["operator"].strip() == "in": - ## it is assuming that in operator value is a lit + # it is assuming that in operator value is a lit main_dd = main_attribute_query_in_template.substitute( attribute=attribute["name"].strip(), - value= json.dumps(attribute["value"]), + value=json.dumps(attribute["value"]), ) elif attribute["name"].endswith("_id") or attribute["name"] == "id": main_dd = main_attribute_query_template_id.substitute( @@ -267,7 +270,10 @@ def elasticsearch_query_builder( attribute=attribute["name"].strip(), value=str(attribute["value"]).strip(), ) - if attribute["operator"].strip() == "equals" or attribute["operator"].strip() == "in" : + if ( + attribute["operator"].strip() == "equals" + or attribute["operator"].strip() == "in" + ): nested_must_part.append(main_dd) elif attribute["operator"].strip() == "not_equals": nested_must_not_part.append(main_dd) @@ -297,7 +303,6 @@ def elasticsearch_query_builder( main_dd = main_attribute_query_template.substitute( attribute=attribute["name"].strip(), value=str(attribute["value"]).strip(), - ) if attribute["operator"].strip() == "equals": @@ -1007,9 +1012,15 @@ def get_pagination(total_pages, next_bookmark, pagination_dict): def search_index_using_search_after( - e_index, query, bookmark_, pagination_dict, return_containers, data_source=None, ret_type=None + e_index, + query, + bookmark_, + pagination_dict, + return_containers, + data_source=None, + ret_type=None, ) -> object: - #toz ya + # toz ya returned_results = [] if bookmark_ and not pagination_dict: add_paination = False @@ -1027,7 +1038,7 @@ def search_index_using_search_after( attribute="data_source", value=json.dumps([data_s]), ) - #query["query"]["bool"]["must"][0] = json.loads(main_dd) + # query["query"]["bool"]["must"][0] = json.loads(main_dd) query2["query"]["bool"]["must"].append(json.loads(main_dd)) res = es.search(index=e_index, body=query2) if len(res["hits"]["hits"]) == 0: @@ -1040,7 +1051,7 @@ def search_index_using_search_after( res_res = get_studies_titles(ek["key"], ret_type, data_source) res_res["image count"] = ek["doc_count"] if data_source: - res_res["data_source"] =data_s + res_res["data_source"] = data_s returned_results.append(res_res) return returned_results @@ -1141,12 +1152,12 @@ def search_resource_annotation( return build_error_message( "{query} is not a valid query".format(query=query) ) - if data_source and data_source.lower() != "all": - data_sources=get_data_sources() - data_source = [itm.strip() for itm in data_source.split(',')] + if data_source and data_source.lower() != "all": + data_sources = get_data_sources() + data_source = [itm.strip() for itm in data_source.split(",")] for data_s in data_source: if data_s and data_s.strip().lower() not in data_sources: - return "'%s' is not a data source"%data_s + return "'%s' is not a data source" % data_s clause = {} clause["name"] = "data_source" clause["value"] = data_source @@ -1175,7 +1186,7 @@ def search_resource_annotation( if isinstance(query_string, dict): return query_string - #search_omero_app.logger.info("Query %s" % query_string) + # search_omero_app.logger.info("Query %s" % query_string) query = json.loads(query_string, strict=False) raw_query_to_send_back = json.loads(query_string, strict=False) @@ -1204,14 +1215,25 @@ def search_resource_annotation( ) res_2 = search_index_using_search_after( - res_index, query, bookmark, pagination_dict, return_containers, data_source=data_source, ret_type="screen" + res_index, + query, + bookmark, + pagination_dict, + return_containers, + data_source=data_source, + ret_type="screen", ) # Combines the containers results studies = res + res_2 res = {"results": studies} else: res = search_index_using_search_after( - res_index, query, bookmark, pagination_dict, return_containers, data_source=data_source + res_index, + query, + bookmark, + pagination_dict, + return_containers, + data_source=data_source, ) notice = "" end_time = time.time() @@ -1232,7 +1254,7 @@ def search_resource_annotation( # ) -def get_studies_titles(idr_name, resource,data_source=None): +def get_studies_titles(idr_name, resource, data_source=None): """ use the res_raw_query to return the study title (publication and study) """ @@ -1318,7 +1340,8 @@ def get_data_sources(): data_sources.append(data_source) return data_sources + def check_empty_string(string_to_check): if string_to_check: - string_to_check=string_to_check.strip() + string_to_check = string_to_check.strip() return string_to_check diff --git a/omero_search_engine/cache_functions/elasticsearch/transform_data.py b/omero_search_engine/cache_functions/elasticsearch/transform_data.py index 58de101..4fe769e 100644 --- a/omero_search_engine/cache_functions/elasticsearch/transform_data.py +++ b/omero_search_engine/cache_functions/elasticsearch/transform_data.py @@ -588,7 +588,7 @@ def insert_resource_data_from_df(df, resource, data_source, lock=None): actions.append({"_index": es_index, "_source": record}) # , es = search_omero_app.config.get("es_connector") - #logging.getLogger("elasticsearch").setLevel(logging.ERROR) + # logging.getLogger("elasticsearch").setLevel(logging.ERROR) search_omero_app.logger.info("Pushing the data to the Elasticsearch") try: lock.acquire() @@ -732,7 +732,9 @@ def save_key_value_buckets( push_keys_cache_index( resource_keys, resource_table, data_source, es_index_2, name_results ) - logging.info(type(resource_keys), type(resource_table), es_index_2, type(name_results)) + logging.info( + type(resource_keys), type(resource_table), es_index_2, type(name_results) + ) if only_values: continue search_omero_app.logger.info( @@ -808,7 +810,7 @@ def save_key_value_buckets_process(lock, global_counter, vals): search_omero_app.logger.info(helpers.bulk(es, actions)) except Exception as e: search_omero_app.logger.info("Error: %s" % str(e)) - #raise e + # raise e raise e finally: lock.release() @@ -818,7 +820,7 @@ def save_key_value_buckets_process(lock, global_counter, vals): Error:%s " % (global_counter.value, str(e)) ) - #raise e + # raise e if wrong_keys.get(resource_table): wrong_keys[resource_table] = wrong_keys[resource_table].append(key) else: @@ -840,9 +842,7 @@ def get_keys(res_table, data_source): return results -def push_keys_cache_index( - results, resource, data_source, es_index, resourcename=None -): +def push_keys_cache_index(results, resource, data_source, es_index, resourcename=None): row = {} row["name"] = results row["doc_type"] = es_index diff --git a/omero_search_engine/database/utils.py b/omero_search_engine/database/utils.py index 220a598..b9a8862 100644 --- a/omero_search_engine/database/utils.py +++ b/omero_search_engine/database/utils.py @@ -45,13 +45,18 @@ def restore_database(source): and data_source["name"].lower() != source.lower() ): continue - backup_filename = os.path.join(mm, "app_data/%s"%data_source.get("DATABASE").get("DATABASE_BACKUP_FILE")) + backup_filename = os.path.join( + mm, "app_data/%s" % data_source.get("DATABASE").get("DATABASE_BACKUP_FILE") + ) - create_database_comand = "psql --username %s --host %s --port %s -c 'create database %s'" % ( - data_source.get("DATABASE").get("DATABASE_USER"), - data_source.get("DATABASE").get("DATABASE_SERVER_URI"), - data_source.get("DATABASE").get("DATABASE_PORT"), - data_source.get("DATABASE").get("DATABASE_NAME") + create_database_comand = ( + "psql --username %s --host %s --port %s -c 'create database %s'" + % ( + data_source.get("DATABASE").get("DATABASE_USER"), + data_source.get("DATABASE").get("DATABASE_SERVER_URI"), + data_source.get("DATABASE").get("DATABASE_PORT"), + data_source.get("DATABASE").get("DATABASE_NAME"), + ) ) print("create_database_comand: %s" % create_database_comand) @@ -63,10 +68,9 @@ def restore_database(source): "PGPASSWORD": data_source.get("DATABASE").get("DATABASE_PASSWORD") }, ) - stdout, stderr =proc.communicate() - + stdout, stderr = proc.communicate() - print ("Done for create %s, error %s"%(stdout,stderr)) + print("Done for create %s, error %s" % (stdout, stderr)) except Exception as e: print("Error: exception happened during create database %s" % (e)) restore_command = "psql --username %s --host %s --port %s -d %s -f %s" % ( @@ -74,7 +78,7 @@ def restore_database(source): data_source.get("DATABASE").get("DATABASE_SERVER_URI"), data_source.get("DATABASE").get("DATABASE_PORT"), data_source.get("DATABASE").get("DATABASE_NAME"), - backup_filename + backup_filename, ) print("Resore command: %s" % restore_command) try: diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index d0513f9..94fec68 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -420,7 +420,9 @@ def get_results_searchengine(self, operator=None): query_validation_res = query_validator(query_data) if query_validation_res == "OK": search_omero_app.logger.info("Getting results from search engine") - searchengine_results = determine_search_results_(query_data, data_source=self.data_source) + searchengine_results = determine_search_results_( + query_data, data_source=self.data_source + ) if searchengine_results.get("results"): size = searchengine_results.get("results").get("size") ids = [ @@ -448,7 +450,7 @@ def get_results_searchengine(self, operator=None): ) # noqa query_data_ = {"query_details": query, "bookmark": bookmark} searchengine_results_ = determine_search_results_( - query_data_,data_source=self.data_source + query_data_, data_source=self.data_source ) # noqa ids_ = [ item["id"] @@ -465,7 +467,7 @@ def get_results_searchengine(self, operator=None): "pagination": pagination_dict, } searchengine_results_ = determine_search_results_( - query_data_,data_source=self.data_source + query_data_, data_source=self.data_source ) # noqa ids_ = [ item["id"] @@ -797,7 +799,7 @@ def test_no_images(data_source): headers = lines[0] headers = headers.split("\t") - #for i in range(len(headers) - 1): + # for i in range(len(headers) - 1): # print(i, headers[i]) names = {} for line in lines: @@ -839,7 +841,7 @@ def test_no_images(data_source): and_filters = [] query = {"and_filters": and_filters, "or_filters": or_filters} query_data = {"query_details": query} - returned_results = determine_search_results_(query_data,data_source) + returned_results = determine_search_results_(query_data, data_source) if returned_results.get("results"): if returned_results.get("results").get("size"): total_results = returned_results["results"]["size"] @@ -924,6 +926,7 @@ def get_omero_stats(): with open(stats_file, "w") as f: f.write(report) + def check_number_images_sql_containers_using_ids(data_source): """ This method tests the number of images inside each container @@ -1017,10 +1020,10 @@ def check_number_images_sql_containers_using_ids(data_source): if ress["id"] not in test_array: print(ress["id"]) search_omero_app.logger.info("ERROR: Not equal results") - print (sql, query_data) - print ("searchengine_results:",searchengine_results) - print ("postgres_results: ",postgres_results) - print ("==============-=======") + print(sql, query_data) + print("searchengine_results:", searchengine_results) + print("postgres_results: ", postgres_results) + print("==============-=======") print( "Error checking %s name: %s, id: %s" % (resource, res_name, res_id) @@ -1033,6 +1036,7 @@ def check_number_images_sql_containers_using_ids(data_source): ) return dd + def get_no_images_sql_containers(data_source, write_report=True): """ This method tests the number of images inside each container @@ -1046,45 +1050,46 @@ def get_no_images_sql_containers(data_source, write_report=True): from omero_search_engine.api.v1.resources.utils import ( search_resource_annotation, ) - from omero_search_engine.api.v1.resources.utils import adjust_query_for_container + + # from omero_search_engine.api.v1.resources.utils import adjust_query_for_container # conn = search_omero_app.config["database_connector"] messages = [] for data_source_ in search_omero_app.config.database_connectors.keys(): - if data_source_.lower()!=data_source.lower(): + if data_source_.lower() != data_source.lower(): continue conn = search_omero_app.config.database_connectors[data_source] - all_names = get_resource_names("all",data_source=json.dumps(data_source)) - #print (all_names) - + all_names = get_resource_names("all", data_source=json.dumps(data_source)) + # print (all_names) for resource in all_names: messages.append( - "######################## Checking %s ########################\n" % resource + "######################## Checking %s ########################\n" + % resource ) for ds, res_name__ in all_names.get(resource).items(): for res_name_ in res_name__: res_name = res_name_.get("name") res_id = res_name_.get("id") - print (res_name) + print(res_name) message1 = "Checking %s name: %s" % (resource, res_name) messages.append(message1) search_omero_app.logger.info(message1) - # and_filters = [ - # { - # "name": "name", - # "id": res_id, - # "operator": "equals", - # "resource": "container", - # } - # ] - # or_filters = [] - #query = {"and_filters": and_filters, "or_filters": or_filters} - #query_data = {"query_details": query} - #adjust_query_for_container(query_data) + # and_filters = [ + # { + # "name": "name", + # "id": res_id, + # "operator": "equals", + # "resource": "container", + # } + # ] + # or_filters = [] + # query = {"and_filters": and_filters, "or_filters": or_filters} + # query_data = {"query_details": query} + # adjust_query_for_container(query_data) and_filters = [] main_attributes = { "and_main_attributes": [ @@ -1111,20 +1116,22 @@ def get_no_images_sql_containers(data_source, write_report=True): } returned_results = search_resource_annotation("image", query_data) - #returned_results = determine_search_results_(query_data, data_source=data_source) + # returned_results = determine_search_results_ + # (query_data, data_source=data_source) if returned_results.get("results"): if returned_results.get("results").get("size"): seachengine_results = returned_results["results"]["size"] else: seachengine_results = 0 message2 = ( - "No of images returned from searchengine: %s" % seachengine_results + "No of images returned from searchengine: %s" + % seachengine_results ) search_omero_app.logger.info(message2) messages.append(message2) - #sql = query_methods["%s_name" % resource].substitute( + # sql = query_methods["%s_name" % resource].substitute( # name=res_name, operator="=" - #) + # ) if resource == "project": sql = query_images_in_project_id.substitute(project_id=res_id) elif resource == "screen": @@ -1132,7 +1139,8 @@ def get_no_images_sql_containers(data_source, write_report=True): results = conn.execute_query(sql) postgres_results = len(results) message3 = ( - "Number of images returned from the database: %s" % postgres_results + "Number of images returned from the database: %s" + % postgres_results ) messages.append(message3) search_omero_app.logger.info(message3) diff --git a/unit_tests/test_app.py b/unit_tests/test_app.py index 77fe43b..cfb4f25 100644 --- a/unit_tests/test_app.py +++ b/unit_tests/test_app.py @@ -36,7 +36,7 @@ from omero_search_engine.validation.results_validator import ( Validator, - check_number_images_sql_containers_using_ids, + # check_number_images_sql_containers_using_ids, ) from omero_search_engine.cache_functions.elasticsearch.transform_data import ( delete_es_index, @@ -114,15 +114,20 @@ def test_query_database(self): data_source ].execute_query(sql) self.assertIsNotNone(res) - found_db_name=False + found_db_name = False for source in search_omero_app.config.get("DATA_SOURCES"): - if source.get("DATABASE").get("DATABASE_NAME")==res[0]["current_database"]: - found_db_name=True + if ( + source.get("DATABASE").get("DATABASE_NAME") + == res[0]["current_database"] + ): + found_db_name = True break self.assertTrue(found_db_name) - #self.assertEqual(res[0]["current_database"], search_omero_app.config.database_connectors[data_source]["DATABASE_NAME"]) - #self.assertEqual(res[0]["current_database"], search_omero_app.config.database_connectors[data_source]["DATABASE_NAME"]) + # self.assertEqual(res[0]["current_database"], + # search_omero_app.config.database_connectors[data_source]["DATABASE_NAME"]) + # self.assertEqual(res[0]["current_database"], + # search_omero_app.config.database_connectors[data_source]["DATABASE_NAME"]) def validate_json_syntax(self, json_template): try: @@ -228,9 +233,9 @@ def test_or_query(self): ) self.assertTrue(validator.identical) -# def test_no_images_containers(self): -# for data_source in search_omero_app.config.database_connectors.keys(): -# self.assertTrue(check_number_images_sql_containers_using_ids(data_source)) + # def test_no_images_containers(self): + # for data_source in search_omero_app.config.database_connectors.keys(): + # self.assertTrue(check_number_images_sql_containers_using_ids(data_source)) def test_multi_or_quries(self): pass