From 1d798eca9581df0a378aa9cae8eae2a5440abaed Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Sat, 21 Jan 2023 17:56:55 +0000 Subject: [PATCH 01/28] Get container keys, and get values for a key in a container --- examples/conatiner_key_values.py | 54 +++++++++++ .../api/v1/resources/resource_analyser.py | 95 +++++++++++++++++++ .../resources/swagger_docs/container_keys.yml | 20 ++++ .../swagger_docs/container_keyvalues.yml | 25 +++++ .../submitquery_returncontainers.yml | 2 +- omero_search_engine/api/v1/resources/urls.py | 36 +++++++ .../validation/psql_templates.py | 35 +++++++ .../validation/results_validator.py | 44 +++++++++ 8 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 examples/conatiner_key_values.py create mode 100644 omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml create mode 100644 omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml diff --git a/examples/conatiner_key_values.py b/examples/conatiner_key_values.py new file mode 100644 index 00000000..c5ce3c11 --- /dev/null +++ b/examples/conatiner_key_values.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (C) 2023 University of Dundee & Open Microscopy Environment. +# All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from utils import base_url +import requests, json +import logging + +''' +Return the available keys in a containers +Also get the aviable values for a key ''' + +resource="image" +container_name="idr0034" +key="cell line" + +#the following url will return the existing key in this container +keys_url = "{base_url}resources/image/container_keys/?container_name={container_name}".format( + base_url=base_url, container_name=container_name) + +resp = requests.get(url=keys_url) +keys_results = json.loads(resp.text) +for result in keys_results: + logging.info ("%s: %s"%(result.get("type"), result.get("name"))) + for bucket in result.get("results"): + logging.info ("Key: %s, no of images: %s "%(bucket.get("key"), bucket.get("no_image"))) + +#It is possible to get all the available values for a key +values_key_url="{base_url}resources/image/container_keyvalues/?container_name={container_name}&key={key}".format( + base_url=base_url, container_name=container_name, key=key) + +resp = requests.get(url=values_key_url) + +key_values_results = json.loads(resp.text) + +for result in key_values_results: + logging.info ("%s: %s"%(result.get("type"), result.get("name"))) + for bucket in result.get("results"): + logging.info ("Key: %s, value: %s, no of images: %s "%(bucket.get("key"),bucket.get("value"), bucket.get("no_image"))) diff --git a/omero_search_engine/api/v1/resources/resource_analyser.py b/omero_search_engine/api/v1/resources/resource_analyser.py index 01474193..db680203 100644 --- a/omero_search_engine/api/v1/resources/resource_analyser.py +++ b/omero_search_engine/api/v1/resources/resource_analyser.py @@ -801,3 +801,98 @@ def get_the_results(resource, name, description, es_index="key_values_resource_c del item["description"] return returned_results + + +def get_container_values_for_key(table_, container_name, key=None): + key_number_search_template + retuned_results = [] + pr_names = get_resource_names("all") + for resourse, names in pr_names.items(): + act_name = [ + {"id": name["id"], "name": name["name"]} + for name in names + if name["name"] and container_name.lower() in name["name"].lower() + ] + if len(act_name) > 0: + for id in act_name: + if resourse != table_: + res = process_container_query( + table_, resourse + "_id", id["id"], key, table_ + ) + else: + res = process_container_query(table_, "id", id["id"], key, table_) + if len(res) > 0: + retuned_results.append( + {"name": id["name"], "type": resourse, "results": res} + ) + return retuned_results + + +def process_container_query(table_, attribute_name, container_id, key, resourse): + from omero_search_engine.api.v1.resources.utils import elasticsearch_query_builder + + res_index = resource_elasticsearchindex.get(table_) + main_attributes = { + "and_main_attributes": [ + {"name": attribute_name, "value": container_id, "operator": "equals"} + ] + } + query_ = elasticsearch_query_builder([], [], False, main_attributes=main_attributes) + query = json.loads(query_) + if key: + query["aggs"] = json.loads( + container_project_values_key_template.substitute(key=key.strip()) + ) + else: + query["aggs"] = container_project_keys_template + query["_source"] = {"includes": [""]} + res = search_index_for_value(res_index, query) + if key: + buckets = res["aggregations"]["key_values"]["key_filter"]["uniquesTerms"][ + "buckets" + ] + for bucket in buckets: + bucket["value"] = bucket["key"] + bucket["key"] = key + bucket["no_" + resourse] = bucket["doc_count"] + del bucket["doc_count"] + return buckets + + else: + buckets = res["aggregations"]["keys_search"]["uniquesTerms"]["buckets"] + for bucket in buckets: + bucket["no_" + resourse] = bucket["doc_count"] + del bucket["doc_count"] + return buckets + + +"""' +get all the values buckets for a key""" +container_project_values_key_template = Template( + """{"key_values":{"nested":{"path":"key_values"},"aggs":{"key_filter":{ + "filter":{"terms":{"key_values.name.keynamenormalize":["$key"]} + },"aggs":{"required_values":{"cardinality": + {"field": "key_values.value.keyvalue", + "precision_threshold":4000}},"uniquesTerms": + {"terms": {"field": "key_values.value.keyvalue","size": 10000}}}}}}}""" +) + + +""" +Get all the keys bucket""" +container_project_keys_template = { + "keys_search": { + "nested": {"path": "key_values"}, + "aggs": { + "required_values": { + "cardinality": { + "field": "key_values.name.keynamenormalize", + "precision_threshold": 4000, + }, + }, + "uniquesTerms": { + "terms": {"field": "key_values.name.keynamenormalize", "size": 10000} + }, + }, + } +} diff --git a/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml b/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml new file mode 100644 index 00000000..6b0f5866 --- /dev/null +++ b/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml @@ -0,0 +1,20 @@ +A searchengine endpoint to return the avilable keys for a container +--- +tags: + - Container keys +parameters: + - name: resource_table + in: path + type: string + enum: ['image', 'project', 'screen', 'well', 'plate'] + required: true + - name: container_name + description: The container name + in: query + type: string + required: true +responses: + 200: + description: A JSON contains the search results + examples: + results: [] diff --git a/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml b/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml new file mode 100644 index 00000000..f6082a1b --- /dev/null +++ b/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml @@ -0,0 +1,25 @@ +A searchengine endpoint to return the avialbe values for a key in specific container +--- +tags: + - Avialbe values for a key in specific container +parameters: + - name: resource_table + in: path + type: string + enum: ['image', 'project', 'screen', 'well', 'plate'] + required: true + - name: container_name + description: The container name + in: query + type: string + required: true + - name: key + description: the resource attribute + in: query + type: string + required: true +responses: + 200: + description: A JSON contains the search results + examples: + results: [] diff --git a/omero_search_engine/api/v1/resources/swagger_docs/submitquery_returncontainers.yml b/omero_search_engine/api/v1/resources/swagger_docs/submitquery_returncontainers.yml index 5aab3e29..968ccfcf 100644 --- a/omero_search_engine/api/v1/resources/swagger_docs/submitquery_returncontainers.yml +++ b/omero_search_engine/api/v1/resources/swagger_docs/submitquery_returncontainers.yml @@ -50,7 +50,7 @@ Another example: --- tags: - - Mixed Complex query + - Return containers for a Mixed Complex query parameters: - name: return_columns diff --git a/omero_search_engine/api/v1/resources/urls.py b/omero_search_engine/api/v1/resources/urls.py index df8c9bba..52ae83df 100644 --- a/omero_search_engine/api/v1/resources/urls.py +++ b/omero_search_engine/api/v1/resources/urls.py @@ -387,3 +387,39 @@ def search(resource_table): return_containers, ) return jsonify(results) + + +@resources.route("//container_keyvalues/", methods=["GET"]) +def container_key_values_search(resource_table): + """ + file: swagger_docs/container_keyvalues.yml + """ + from omero_search_engine.api.v1.resources.resource_analyser import ( + get_container_values_for_key, + ) + + key = request.args.get("key") + container_name = request.args.get("container_name") + if not container_name or not key: + return build_error_message("Container name and key are required") + results = get_container_values_for_key(resource_table, container_name, key) + return jsonify(results) + + +@resources.route("//container_keys/", methods=["GET"]) +def container_keys_search(resource_table): + """ + file: swagger_docs/container_keys.yml + """ + from omero_search_engine.api.v1.resources.resource_analyser import ( + get_container_values_for_key, + ) + + container_name = request.args.get("container_name") + if not container_name: + return build_error_message("Container name is required") + results = get_container_values_for_key( + resource_table, + container_name, + ) + return jsonify(results) diff --git a/omero_search_engine/validation/psql_templates.py b/omero_search_engine/validation/psql_templates.py index cabec123..36168d97 100644 --- a/omero_search_engine/validation/psql_templates.py +++ b/omero_search_engine/validation/psql_templates.py @@ -178,3 +178,38 @@ def substitute(self, **kwargs): where lower(annotation_mapvalue.name)=lower('$key') and lower(annotation_mapvalue.value) =lower('$value')""" ) + +project_key_values = Template( + """ +Select DISTINCT ( annotation_mapvalue.value) from image +left join imageannotationlink on image.id =imageannotationlink.parent +left join annotation_mapvalue on +annotation_mapvalue.annotation_id=imageannotationlink.child +inner join datasetimagelink on datasetimagelink.child=image.id +inner join dataset on datasetimagelink.parent=dataset.id +inner join projectdatasetlink on dataset.id=projectdatasetlink.child +inner join project on project.id=projectdatasetlink.parent +where project.id= $id and lower (annotation_mapvalue.name) =lower('$name'); +""" +) + +screen_key_values = Template( + """ +Select DISTINCT (annotation_mapvalue.value) from image +left join imageannotationlink on image.id =imageannotationlink.parent +left join annotation_mapvalue on +annotation_mapvalue.annotation_id=imageannotationlink.child +inner join wellsample on wellsample.image=image.id +inner join well on wellsample.well= well.id +inner join plate on well.plate=plate.id +inner join screenplatelink on plate.id=screenplatelink.child +inner join screen on screen.id=screenplatelink.parent +where screen.id =$id and lower (annotation_mapvalue.name) =lower('$name'); + """ +) + +container_from_name = Template( + """ +select id from $container where name like '%$name%'; +""" +) diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index eabb2130..4dc23212 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -755,3 +755,47 @@ def get_no_images_sql_containers(): report = "\n".join(messages) # noqa with open(report_file, "w") as f: f.write(report) + + +def container_keys_vakues(): + from omero_search_engine.validation.psql_templates import ( + container_from_name, + screen_key_values, + project_key_values, + ) + from omero_search_engine.api.v1.resources.resource_analyser import ( + get_container_values_for_key, + ) + + container_name = "idr0034" + key = "Cell Line" + project_sql = container_from_name.substitute( + container="project", name=container_name + ) + screen_sql = container_from_name.substitute(container="screen", name=container_name) + conn = search_omero_app.config["database_connector"] + project_ids_results = conn.execute_query(project_sql) + screen_ids_results = conn.execute_query(screen_sql) + print(project_ids_results) + print(screen_ids_results) + if len(screen_ids_results) > 0: + for id in screen_ids_results: + screen_sql = screen_key_values.substitute(id=id.get("id"), name=key) + screen_results = conn.execute_query(screen_sql) + scr_searchengine_results = get_container_values_for_key( + "image", container_name, key + ) + print(scr_searchengine_results) + print(len(screen_results)) + print(len(screen_results), len(scr_searchengine_results[0].get("results"))) + + if len(screen_ids_results) > 0: + for id in screen_ids_results: + project_sql = project_key_values.substitute(id=id.get("id"), name=key) + project_results = conn.execute_query(project_sql) + pr_searchengine_results = get_container_values_for_key( + "image", container_name, key + ) + print(pr_searchengine_results) + print(len(project_results)) + print(len(project_results), len(pr_searchengine_results[0].get("results"))) From d5a53c2b3d77a986b33753c68bb64880f71aa349 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Sun, 22 Jan 2023 20:13:30 +0000 Subject: [PATCH 02/28] add method to check conainer ket and values --- manage.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/manage.py b/manage.py index 0d4c9ca0..505eed86 100644 --- a/manage.py +++ b/manage.py @@ -333,5 +333,12 @@ def restore_elasticsearch_data(): restore_indices_data() +@manager.command +def check_containers(): + from omero_search_engine.validation.results_validator import container_keys_vakues + + container_keys_vakues() + + if __name__ == "__main__": manager.run() From cbb96768fd416fb3cdce2bb7dc89c8ac2458b1fb Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Sun, 22 Jan 2023 20:20:59 +0000 Subject: [PATCH 03/28] Fix pre commit format --- examples/conatiner_key_values.py | 47 +++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/examples/conatiner_key_values.py b/examples/conatiner_key_values.py index c5ce3c11..d9eb836a 100644 --- a/examples/conatiner_key_values.py +++ b/examples/conatiner_key_values.py @@ -18,37 +18,52 @@ # along with this program. If not, see . from utils import base_url -import requests, json +import requests +import json import logging -''' +""" Return the available keys in a containers -Also get the aviable values for a key ''' +Also get the aviable values for a key """ -resource="image" -container_name="idr0034" -key="cell line" +resource = "image" +container_name = "idr0034" +key = "cell line" -#the following url will return the existing key in this container -keys_url = "{base_url}resources/image/container_keys/?container_name={container_name}".format( - base_url=base_url, container_name=container_name) +# the following url will return the existing key in this container +keys_url = ( + "{base_url}resources/image/container_keys/?container_name={container_name}".format( + base_url=base_url, container_name=container_name + ) +) resp = requests.get(url=keys_url) keys_results = json.loads(resp.text) for result in keys_results: - logging.info ("%s: %s"%(result.get("type"), result.get("name"))) + logging.info("%s: %s" % (result.get("type"), result.get("name"))) for bucket in result.get("results"): - logging.info ("Key: %s, no of images: %s "%(bucket.get("key"), bucket.get("no_image"))) + logging.info( + "Key: %s, no of images: %s " % (bucket.get("key"), bucket.get("no_image")) + ) -#It is possible to get all the available values for a key -values_key_url="{base_url}resources/image/container_keyvalues/?container_name={container_name}&key={key}".format( - base_url=base_url, container_name=container_name, key=key) +""" It is possible to get all the available +values for a key +""" +values_key_url = ( + "{base_url}resources/image/" + "container_keyvalues/?container_name={container_name}&key={key}".format( + base_url=base_url, container_name=container_name, key=key + ) +) resp = requests.get(url=values_key_url) key_values_results = json.loads(resp.text) for result in key_values_results: - logging.info ("%s: %s"%(result.get("type"), result.get("name"))) + logging.info("%s: %s" % (result.get("type"), result.get("name"))) for bucket in result.get("results"): - logging.info ("Key: %s, value: %s, no of images: %s "%(bucket.get("key"),bucket.get("value"), bucket.get("no_image"))) + logging.info( + "Key: %s, value: %s, no of images: %s " + % (bucket.get("key"), bucket.get("value"), bucket.get("no_image")) + ) From 01f1de2c8ade3013f69661c02ab051ead1178bb3 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Fri, 27 Jan 2023 18:40:05 +0000 Subject: [PATCH 04/28] Update test container key values --- manage.py | 8 ++-- .../validation/results_validator.py | 42 +++++++++++++------ 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/manage.py b/manage.py index 505eed86..f13b9201 100644 --- a/manage.py +++ b/manage.py @@ -334,10 +334,12 @@ def restore_elasticsearch_data(): @manager.command -def check_containers(): - from omero_search_engine.validation.results_validator import container_keys_vakues +def test_container_key_value(): + from omero_search_engine.validation.results_validator import ( + check_container_keys_vakues, + ) - container_keys_vakues() + check_container_keys_vakues() if __name__ == "__main__": diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index 4dc23212..3e3f5eee 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -757,7 +757,7 @@ def get_no_images_sql_containers(): f.write(report) -def container_keys_vakues(): +def check_container_keys_vakues(): from omero_search_engine.validation.psql_templates import ( container_from_name, screen_key_values, @@ -768,7 +768,7 @@ def container_keys_vakues(): ) container_name = "idr0034" - key = "Cell Line" + key = "cell line" project_sql = container_from_name.substitute( container="project", name=container_name ) @@ -776,8 +776,8 @@ def container_keys_vakues(): conn = search_omero_app.config["database_connector"] project_ids_results = conn.execute_query(project_sql) screen_ids_results = conn.execute_query(screen_sql) - print(project_ids_results) - print(screen_ids_results) + search_omero_app.logger.info("projects: %s" % project_ids_results) + search_omero_app.logger.info("screens: %s" % screen_ids_results) if len(screen_ids_results) > 0: for id in screen_ids_results: screen_sql = screen_key_values.substitute(id=id.get("id"), name=key) @@ -785,17 +785,35 @@ def container_keys_vakues(): scr_searchengine_results = get_container_values_for_key( "image", container_name, key ) - print(scr_searchengine_results) - print(len(screen_results)) - print(len(screen_results), len(scr_searchengine_results[0].get("results"))) + search_omero_app.logger.info( + "Results from PostgreSQL database: %s" % len(screen_results) + ) + if len(scr_searchengine_results) > 0 and scr_searchengine_results[0].get( + "results" + ): + search_omero_app.logger.info( + "Searchengine results: %s" + % len(scr_searchengine_results[0].get("results")) + ) + else: + search_omero_app.logger.info("No results returned from searchengine") - if len(screen_ids_results) > 0: - for id in screen_ids_results: + if len(project_ids_results) > 0: + for id in project_ids_results: project_sql = project_key_values.substitute(id=id.get("id"), name=key) project_results = conn.execute_query(project_sql) pr_searchengine_results = get_container_values_for_key( "image", container_name, key ) - print(pr_searchengine_results) - print(len(project_results)) - print(len(project_results), len(pr_searchengine_results[0].get("results"))) + search_omero_app.logger.info( + "Results from PostgreSQL database: %s" % len(project_results) + ) + if len(pr_searchengine_results) > 0 and pr_searchengine_results[0].get( + "results" + ): + search_omero_app.logger.info( + "Searchengine results: %s " + % len(pr_searchengine_results[0].get("results")) + ) + else: + search_omero_app.logger.info("No results returned from searchengine") From 81e90beeb69b9b02430c5a0c26ac8b9f5240fca6 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Sat, 21 Jan 2023 17:56:55 +0000 Subject: [PATCH 05/28] Get container keys, and get values for a key in a container --- omero_search_engine/validation/results_validator.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index 3e3f5eee..1e2fb6e8 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -756,7 +756,6 @@ def get_no_images_sql_containers(): with open(report_file, "w") as f: f.write(report) - def check_container_keys_vakues(): from omero_search_engine.validation.psql_templates import ( container_from_name, @@ -768,7 +767,9 @@ def check_container_keys_vakues(): ) container_name = "idr0034" + key = "cell line" + project_sql = container_from_name.substitute( container="project", name=container_name ) @@ -776,8 +777,10 @@ def check_container_keys_vakues(): conn = search_omero_app.config["database_connector"] project_ids_results = conn.execute_query(project_sql) screen_ids_results = conn.execute_query(screen_sql) + search_omero_app.logger.info("projects: %s" % project_ids_results) search_omero_app.logger.info("screens: %s" % screen_ids_results) + if len(screen_ids_results) > 0: for id in screen_ids_results: screen_sql = screen_key_values.substitute(id=id.get("id"), name=key) @@ -785,11 +788,12 @@ def check_container_keys_vakues(): scr_searchengine_results = get_container_values_for_key( "image", container_name, key ) + search_omero_app.logger.info( "Results from PostgreSQL database: %s" % len(screen_results) ) if len(scr_searchengine_results) > 0 and scr_searchengine_results[0].get( - "results" + "results" ): search_omero_app.logger.info( "Searchengine results: %s" @@ -800,16 +804,18 @@ def check_container_keys_vakues(): if len(project_ids_results) > 0: for id in project_ids_results: + project_sql = project_key_values.substitute(id=id.get("id"), name=key) project_results = conn.execute_query(project_sql) pr_searchengine_results = get_container_values_for_key( "image", container_name, key ) + search_omero_app.logger.info( "Results from PostgreSQL database: %s" % len(project_results) ) if len(pr_searchengine_results) > 0 and pr_searchengine_results[0].get( - "results" + "results" ): search_omero_app.logger.info( "Searchengine results: %s " From d5701feffa9131f7ba3a6f906816884f29af1065 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Sun, 22 Jan 2023 20:13:30 +0000 Subject: [PATCH 06/28] add method to check conainer ket and values --- manage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/manage.py b/manage.py index f13b9201..2a8ec761 100644 --- a/manage.py +++ b/manage.py @@ -342,5 +342,6 @@ def test_container_key_value(): check_container_keys_vakues() + if __name__ == "__main__": manager.run() From 127c4f4d955a417e0666b97a7fb293cdd84527dd Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Fri, 27 Jan 2023 19:05:10 +0000 Subject: [PATCH 07/28] Fix pre commit fix --- manage.py | 1 - omero_search_engine/validation/results_validator.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/manage.py b/manage.py index 2a8ec761..f13b9201 100644 --- a/manage.py +++ b/manage.py @@ -342,6 +342,5 @@ def test_container_key_value(): check_container_keys_vakues() - if __name__ == "__main__": manager.run() diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index 1e2fb6e8..13a5fb1b 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -756,6 +756,7 @@ def get_no_images_sql_containers(): with open(report_file, "w") as f: f.write(report) + def check_container_keys_vakues(): from omero_search_engine.validation.psql_templates import ( container_from_name, @@ -793,7 +794,7 @@ def check_container_keys_vakues(): "Results from PostgreSQL database: %s" % len(screen_results) ) if len(scr_searchengine_results) > 0 and scr_searchengine_results[0].get( - "results" + "results" ): search_omero_app.logger.info( "Searchengine results: %s" @@ -815,7 +816,7 @@ def check_container_keys_vakues(): "Results from PostgreSQL database: %s" % len(project_results) ) if len(pr_searchengine_results) > 0 and pr_searchengine_results[0].get( - "results" + "results" ): search_omero_app.logger.info( "Searchengine results: %s " From 685913b57de9b597f85c8c1374c6684fc05ad5fd Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Tue, 31 Jan 2023 12:02:58 +0000 Subject: [PATCH 08/28] adding the option to generate a CSV file --- .../api/v1/resources/resource_analyser.py | 47 ++++++- .../resources/swagger_docs/container_keys.yml | 5 + .../swagger_docs/container_keyvalues.yml | 6 + omero_search_engine/api/v1/resources/urls.py | 24 +++- .../validation/results_validator.py | 126 ++++++++++-------- 5 files changed, 143 insertions(+), 65 deletions(-) diff --git a/omero_search_engine/api/v1/resources/resource_analyser.py b/omero_search_engine/api/v1/resources/resource_analyser.py index db680203..02c9e830 100644 --- a/omero_search_engine/api/v1/resources/resource_analyser.py +++ b/omero_search_engine/api/v1/resources/resource_analyser.py @@ -28,6 +28,7 @@ adjust_value, ) import math +from flask import jsonify, Response key_number_search_template = Template( """ @@ -375,8 +376,6 @@ def prepare_search_results_buckets(results_): def get_key_values_return_contents(name, resource, csv): - from flask import jsonify, Response - resource_keys = query_cashed_bucket(name, resource) # if a csv flag is true thenm iut will send a CSV file # which contains the results otherwise it will return a JSON file @@ -803,8 +802,7 @@ def get_the_results(resource, name, description, es_index="key_values_resource_c return returned_results -def get_container_values_for_key(table_, container_name, key=None): - key_number_search_template +def get_container_values_for_key(table_, container_name, csv, key=None): retuned_results = [] pr_names = get_resource_names("all") for resourse, names in pr_names.items(): @@ -825,7 +823,46 @@ def get_container_values_for_key(table_, container_name, key=None): retuned_results.append( {"name": id["name"], "type": resourse, "results": res} ) - return retuned_results + if csv: + if key: + contanets = [ + ",".join(["Container", "Type", "Key", "Value", "No of %s" % table_]) + ] + else: + contanets = [",".join(["Container", "Type", "Key", "No of %s" % table_])] + for r_results in retuned_results: + reso = r_results.get("name") + type = r_results.get("type") + for res in r_results.get("results"): + if key: + contanets.append( + ",".join( + [ + reso, + type, + res.get("key"), + res.get("value"), + str(res.get("no_%s" % table_)), + ] + ) + ) + else: + contanets.append( + ",".join( + [reso, type, res.get("key"), str(res.get("no_%s" % table_))] + ) + ) + if key: + file_name = "container_%s_%s_values.csv" % (container_name, key) + else: + file_name = "container_%s_keys.csv" % container_name + + return Response( + "\n".join(contanets), + mimetype="text/csv", + headers={"Content-disposition": "attachment; filename=%s" % (file_name)}, + ) + return jsonify(retuned_results) def process_container_query(table_, attribute_name, container_id, key, resourse): diff --git a/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml b/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml index 6b0f5866..062666b6 100644 --- a/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml +++ b/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml @@ -13,6 +13,11 @@ parameters: in: query type: string required: true + - name: csv + description: a flag to return a CSV file which is created on the fly instead of JSON + in: query + type: boolean + required: false responses: 200: description: A JSON contains the search results diff --git a/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml b/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml index f6082a1b..88a085c5 100644 --- a/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml +++ b/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml @@ -18,6 +18,12 @@ parameters: in: query type: string required: true + - name: csv + description: a flag to return a CSV file which is created on the fly instead of JSON + in: query + type: boolean + required: false + responses: 200: description: A JSON contains the search results diff --git a/omero_search_engine/api/v1/resources/urls.py b/omero_search_engine/api/v1/resources/urls.py index 52ae83df..77a3f037 100644 --- a/omero_search_engine/api/v1/resources/urls.py +++ b/omero_search_engine/api/v1/resources/urls.py @@ -402,8 +402,14 @@ def container_key_values_search(resource_table): container_name = request.args.get("container_name") if not container_name or not key: return build_error_message("Container name and key are required") - results = get_container_values_for_key(resource_table, container_name, key) - return jsonify(results) + csv = request.args.get("csv") + if csv: + try: + csv = json.loads(csv.lower()) + except Exception: + csv = False + results = get_container_values_for_key(resource_table, container_name, csv, key) + return results @resources.route("//container_keys/", methods=["GET"]) @@ -418,8 +424,12 @@ def container_keys_search(resource_table): container_name = request.args.get("container_name") if not container_name: return build_error_message("Container name is required") - results = get_container_values_for_key( - resource_table, - container_name, - ) - return jsonify(results) + + csv = request.args.get("csv") + if csv: + try: + csv = json.loads(csv.lower()) + except Exception: + csv = False + results = get_container_values_for_key(resource_table, container_name, csv) + return results diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index 13a5fb1b..9cb02345 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -763,64 +763,84 @@ def check_container_keys_vakues(): screen_key_values, project_key_values, ) + import json from omero_search_engine.api.v1.resources.resource_analyser import ( get_container_values_for_key, ) - container_name = "idr0034" - - key = "cell line" - - project_sql = container_from_name.substitute( - container="project", name=container_name - ) - screen_sql = container_from_name.substitute(container="screen", name=container_name) - conn = search_omero_app.config["database_connector"] - project_ids_results = conn.execute_query(project_sql) - screen_ids_results = conn.execute_query(screen_sql) - - search_omero_app.logger.info("projects: %s" % project_ids_results) - search_omero_app.logger.info("screens: %s" % screen_ids_results) - - if len(screen_ids_results) > 0: - for id in screen_ids_results: - screen_sql = screen_key_values.substitute(id=id.get("id"), name=key) - screen_results = conn.execute_query(screen_sql) - scr_searchengine_results = get_container_values_for_key( - "image", container_name, key + csv = False + container_names = ["idr0034", "idr0114"] + keys = ["gene symbol", "cell line"] + for container_name in container_names: + for key in keys: + project_sql = container_from_name.substitute( + container="project", name=container_name ) - - search_omero_app.logger.info( - "Results from PostgreSQL database: %s" % len(screen_results) + screen_sql = container_from_name.substitute( + container="screen", name=container_name ) - if len(scr_searchengine_results) > 0 and scr_searchengine_results[0].get( - "results" - ): - search_omero_app.logger.info( - "Searchengine results: %s" - % len(scr_searchengine_results[0].get("results")) - ) - else: - search_omero_app.logger.info("No results returned from searchengine") - - if len(project_ids_results) > 0: - for id in project_ids_results: + conn = search_omero_app.config["database_connector"] + project_ids_results = conn.execute_query(project_sql) + screen_ids_results = conn.execute_query(screen_sql) + + search_omero_app.logger.info("projects: %s" % project_ids_results) + search_omero_app.logger.info("screens: %s" % screen_ids_results) + + if len(screen_ids_results) > 0: + for id in screen_ids_results: + screen_sql = screen_key_values.substitute(id=id.get("id"), name=key) + screen_results = conn.execute_query(screen_sql) + scr_searchengine_results = get_container_values_for_key( + "image", container_name, csv, key + ) + if len(scr_searchengine_results.response) > 0: + scr_searchengine_results = json.loads( + scr_searchengine_results.response[0] + ) + else: + scr_searchengine_results = scr_searchengine_results.response + search_omero_app.logger.info( + "Results from PostgreSQL database: %s" % len(screen_results) + ) + if len(scr_searchengine_results) > 0 and scr_searchengine_results[ + 0 + ].get("results"): + search_omero_app.logger.info( + "Searchengine results: %s" + % len(scr_searchengine_results[0].get("results")) + ) + else: + search_omero_app.logger.info( + "No results returned from searchengine" + ) + if len(project_ids_results) > 0: + for id in project_ids_results: - project_sql = project_key_values.substitute(id=id.get("id"), name=key) - project_results = conn.execute_query(project_sql) - pr_searchengine_results = get_container_values_for_key( - "image", container_name, key - ) + project_sql = project_key_values.substitute( + id=id.get("id"), name=key + ) + project_results = conn.execute_query(project_sql) + pr_searchengine_results = get_container_values_for_key( + "image", container_name, csv, key + ) + if len(pr_searchengine_results.response) > 0: + pr_searchengine_results = json.loads( + pr_searchengine_results.response[0] + ) + else: + pr_searchengine_results = pr_searchengine_results.response - search_omero_app.logger.info( - "Results from PostgreSQL database: %s" % len(project_results) - ) - if len(pr_searchengine_results) > 0 and pr_searchengine_results[0].get( - "results" - ): - search_omero_app.logger.info( - "Searchengine results: %s " - % len(pr_searchengine_results[0].get("results")) - ) - else: - search_omero_app.logger.info("No results returned from searchengine") + search_omero_app.logger.info( + "Results from PostgreSQL database: %s" % len(project_results) + ) + if len(pr_searchengine_results) > 0 and pr_searchengine_results[ + 0 + ].get("results"): + search_omero_app.logger.info( + "Searchengine results: %s " + % len(pr_searchengine_results[0].get("results")) + ) + else: + search_omero_app.logger.info( + "No results returned from searchengine" + ) From 525b4261bb3f958e11e3357ccd2f3b7893b75205 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Tue, 31 Jan 2023 17:52:03 +0000 Subject: [PATCH 09/28] add comments --- omero_search_engine/validation/results_validator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index 9cb02345..a56d1811 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -758,6 +758,8 @@ def get_no_images_sql_containers(): def check_container_keys_vakues(): + # the will be modified and the tesing data will be adjusted and provided + # at ruun time from omero_search_engine.validation.psql_templates import ( container_from_name, screen_key_values, From d7c697207ed451bcb6e82b91a3a25f4a5f8e3f1d Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 6 Feb 2023 12:00:34 +0000 Subject: [PATCH 10/28] Fix typo --- .../api/v1/resources/resource_analyser.py | 20 +++++++++---------- .../resources/swagger_docs/container_keys.yml | 2 +- .../swagger_docs/container_keyvalues.yml | 4 ++-- .../validation/psql_templates.py | 2 +- .../validation/results_validator.py | 4 ++-- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/omero_search_engine/api/v1/resources/resource_analyser.py b/omero_search_engine/api/v1/resources/resource_analyser.py index 02c9e830..877a3bad 100644 --- a/omero_search_engine/api/v1/resources/resource_analyser.py +++ b/omero_search_engine/api/v1/resources/resource_analyser.py @@ -803,7 +803,7 @@ def get_the_results(resource, name, description, es_index="key_values_resource_c def get_container_values_for_key(table_, container_name, csv, key=None): - retuned_results = [] + returned_results = [] pr_names = get_resource_names("all") for resourse, names in pr_names.items(): act_name = [ @@ -820,22 +820,22 @@ def get_container_values_for_key(table_, container_name, csv, key=None): else: res = process_container_query(table_, "id", id["id"], key, table_) if len(res) > 0: - retuned_results.append( + returned_results.append( {"name": id["name"], "type": resourse, "results": res} ) if csv: if key: - contanets = [ + containers = [ ",".join(["Container", "Type", "Key", "Value", "No of %s" % table_]) ] else: - contanets = [",".join(["Container", "Type", "Key", "No of %s" % table_])] - for r_results in retuned_results: + containers = [",".join(["Container", "Type", "Key", "No of %s" % table_])] + for r_results in returned_results: reso = r_results.get("name") type = r_results.get("type") for res in r_results.get("results"): if key: - contanets.append( + containers.append( ",".join( [ reso, @@ -847,7 +847,7 @@ def get_container_values_for_key(table_, container_name, csv, key=None): ) ) else: - contanets.append( + containers.append( ",".join( [reso, type, res.get("key"), str(res.get("no_%s" % table_))] ) @@ -858,11 +858,11 @@ def get_container_values_for_key(table_, container_name, csv, key=None): file_name = "container_%s_keys.csv" % container_name return Response( - "\n".join(contanets), + "\n".join(containers), mimetype="text/csv", headers={"Content-disposition": "attachment; filename=%s" % (file_name)}, ) - return jsonify(retuned_results) + return jsonify(returned_results) def process_container_query(table_, attribute_name, container_id, key, resourse): @@ -903,7 +903,7 @@ def process_container_query(table_, attribute_name, container_id, key, resourse) return buckets -"""' +""" get all the values buckets for a key""" container_project_values_key_template = Template( """{"key_values":{"nested":{"path":"key_values"},"aggs":{"key_filter":{ diff --git a/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml b/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml index 062666b6..db44eeb3 100644 --- a/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml +++ b/omero_search_engine/api/v1/resources/swagger_docs/container_keys.yml @@ -1,4 +1,4 @@ -A searchengine endpoint to return the avilable keys for a container +A searchengine endpoint to return the available keys for a container --- tags: - Container keys diff --git a/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml b/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml index 88a085c5..4b21c6d8 100644 --- a/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml +++ b/omero_search_engine/api/v1/resources/swagger_docs/container_keyvalues.yml @@ -1,7 +1,7 @@ -A searchengine endpoint to return the avialbe values for a key in specific container +A searchengine endpoint to return the available values for a key in specific container --- tags: - - Avialbe values for a key in specific container + - Available values for a key in specific container parameters: - name: resource_table in: path diff --git a/omero_search_engine/validation/psql_templates.py b/omero_search_engine/validation/psql_templates.py index 36168d97..fa86ecfe 100644 --- a/omero_search_engine/validation/psql_templates.py +++ b/omero_search_engine/validation/psql_templates.py @@ -181,7 +181,7 @@ def substitute(self, **kwargs): project_key_values = Template( """ -Select DISTINCT ( annotation_mapvalue.value) from image +Select DISTINCT (annotation_mapvalue.value) from image left join imageannotationlink on image.id =imageannotationlink.parent left join annotation_mapvalue on annotation_mapvalue.annotation_id=imageannotationlink.child diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index a56d1811..8fae7963 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -758,8 +758,8 @@ def get_no_images_sql_containers(): def check_container_keys_vakues(): - # the will be modified and the tesing data will be adjusted and provided - # at ruun time + # This will be modified and the testing data will be adjusted and provided + # at run time from omero_search_engine.validation.psql_templates import ( container_from_name, screen_key_values, From 9f575a187a2ee847eb9bc6f8781446b0b8b5583f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Feb 2023 03:39:56 +0000 Subject: [PATCH 11/28] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/psf/black: 22.12.0 → 23.1.0](https://github.com/psf/black/compare/22.12.0...23.1.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9159f50b..a8622a84 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ --- repos: - repo: https://github.com/psf/black - rev: 22.12.0 + rev: 23.1.0 hooks: - id: black args: [--target-version=py35] From a0504f4462b4fdd5cc3a0f83e2355e721ef287a7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Feb 2023 03:40:47 +0000 Subject: [PATCH 12/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- examples/search_with_bookmark_paging_using_submitquery.py | 1 - omero_search_engine/__init__.py | 1 - omero_search_engine/api/v1/resources/utils.py | 2 -- .../cache_functions/elasticsearch/transform_data.py | 2 +- 4 files changed, 1 insertion(+), 5 deletions(-) diff --git a/examples/search_with_bookmark_paging_using_submitquery.py b/examples/search_with_bookmark_paging_using_submitquery.py index 7cc432f5..95cb15bf 100644 --- a/examples/search_with_bookmark_paging_using_submitquery.py +++ b/examples/search_with_bookmark_paging_using_submitquery.py @@ -118,7 +118,6 @@ def call_omero_searchengine_return_results(url, data=None, method="post"): ) while len(received_results) < total_results: - page += 1 query_data_ = {"query_details": {"and_filters": and_filters}, "bookmark": bookmark} query_data_json_ = json.dumps(query_data_) diff --git a/omero_search_engine/__init__.py b/omero_search_engine/__init__.py index 77012d72..dcc7196a 100644 --- a/omero_search_engine/__init__.py +++ b/omero_search_engine/__init__.py @@ -127,7 +127,6 @@ def after_request(response): # added to let the user know the proper extension they should use @search_omero_app.errorhandler(404) def page_not_found(error): - search_omero_app.logger.info("Error: %s" % error) resp_message = ( "%s, You may use '/searchengine/api/v1/resources/' to test\ diff --git a/omero_search_engine/api/v1/resources/utils.py b/omero_search_engine/api/v1/resources/utils.py index 39dcb8f9..41d7d889 100644 --- a/omero_search_engine/api/v1/resources/utils.py +++ b/omero_search_engine/api/v1/resources/utils.py @@ -626,7 +626,6 @@ def elasticsearch_query_builder( should_part_list = should_part_list_ if len(should_part_list) > 0: - should_part_ = ",".join(should_part_list) should_part_ = should_term_template.substitute( should_term=should_part_, minimum_should_match=1 @@ -645,7 +644,6 @@ def elasticsearch_query_builder( all_terms = nested_must_part_ if len(nested_must_not_part) > 0: - nested_must_not_part_ = ",".join(nested_must_not_part) nested_must_not_part_ = must_not_term_template.substitute( must_not_term=nested_must_not_part_ diff --git a/omero_search_engine/cache_functions/elasticsearch/transform_data.py b/omero_search_engine/cache_functions/elasticsearch/transform_data.py index 7a3cff01..279a7d8e 100644 --- a/omero_search_engine/cache_functions/elasticsearch/transform_data.py +++ b/omero_search_engine/cache_functions/elasticsearch/transform_data.py @@ -338,7 +338,7 @@ def get_file_list(path_name): from os import walk f = [] - for (dirpath, dirnames, filenames) in walk(path_name): + for dirpath, dirnames, filenames in walk(path_name): f.extend(filenames) return f From 968b28c82da9c7a5e384a525019a52883bd89e99 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Wed, 1 Mar 2023 15:38:25 +0000 Subject: [PATCH 13/28] Fix typo in file name --- examples/{conatiner_key_values.py => container_key_values.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/{conatiner_key_values.py => container_key_values.py} (100%) diff --git a/examples/conatiner_key_values.py b/examples/container_key_values.py similarity index 100% rename from examples/conatiner_key_values.py rename to examples/container_key_values.py From 5d6b5d286a26bebdbe0e543a1cea5a10b84f65a5 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Wed, 17 May 2023 18:36:35 +0100 Subject: [PATCH 14/28] update developer documents --- docs/developer/developer.rst | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/developer/developer.rst b/docs/developer/developer.rst index 10c0e8f9..b1348675 100644 --- a/docs/developer/developer.rst +++ b/docs/developer/developer.rst @@ -1,2 +1,20 @@ Developer's documents -===================== \ No newline at end of file +===================== + +The developer should clone the code from the project repo using the following command:: + + git clone https://github.com/ome/omero_search_engine.git + +Then they need to create a Python virtual environment variable using either venv or conda and install the packages inside requirements.txt + +The developer needs to set up the application configuration as it is explained in the System configuration part inside "docs\configuration\configuration_installation.rst" + +After that, they should run the indexer to index Omero's data using the following command:: + + python manage.py get_index_data_from_database + +The developer can run the application using the following command:: + + python manage.py runserver -p 5577 + +Running the scripts inside the examples folder can be a good starting point. From c67e7198980eff6738008b52e8573e89b6fe7686 Mon Sep 17 00:00:00 2001 From: Jean-Marie Burel Date: Sat, 20 May 2023 06:25:40 +0100 Subject: [PATCH 15/28] add rtd config --- .readthedocs.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .readthedocs.yml diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..b38f4eed --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,23 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Build documentation with MkDocs +#mkdocs: +# configuration: mkdocs.yml + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + +# Optionally set the version of Python and requirements required to build your docs +build: + os: ubuntu-22.04 + tools: + python: "3.11" From 66d78f4a4c9fa1f870438f243d144790c0cab6bc Mon Sep 17 00:00:00 2001 From: Jean-Marie Burel Date: Sat, 20 May 2023 06:41:53 +0100 Subject: [PATCH 16/28] add doc badge --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index 9f11def3..c86caa36 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,10 @@ .. image:: https://github.com/ome/omero_search_engine/workflows/Build/badge.svg :target: https://github.com/ome/omero_search_engine/actions +.. image:: https://readthedocs.org/projects/omero-search-engine/badge/?version=latest + :target: https://omero-search-engine.readthedocs.io/en/latest/?badge=latest + :alt: Documentation Status + OMERO Search Engine -------------------- From be0b92418c5dc6258990817d8c1fe2fe493cd6dd Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 12 Jun 2023 12:32:01 +0100 Subject: [PATCH 17/28] update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eee6413..2caf794c 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +0.5.2 (June 2023): +------------------ +- Return all the available values for a key in a container [#77](https://github.com/ome/omero_search_engine/pull/77) +- Return the available attributes with a container + 0.5.1 (February 2023): ---------------------- - Fix the issue of filtering images by the container name [#81](https://github.com/ome/omero_search_engine/pull/81) From 68b0b7a9af35bb4bfb1170e5451aa2f5c626e0ea Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 11:34:23 +0000 Subject: [PATCH 18/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- omero_search_engine/validation/results_validator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index 8fae7963..22f465ca 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -817,7 +817,6 @@ def check_container_keys_vakues(): ) if len(project_ids_results) > 0: for id in project_ids_results: - project_sql = project_key_values.substitute( id=id.get("id"), name=key ) From 0cd00377475a6a336b8eb697150fde4b57b832ed Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 12 Jun 2023 16:35:55 +0100 Subject: [PATCH 19/28] Mention the PR in the second features --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2caf794c..55f6b836 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ 0.5.2 (June 2023): ------------------ - Return all the available values for a key in a container [#77](https://github.com/ome/omero_search_engine/pull/77) -- Return the available attributes with a container +- Return the available attributes with a container [#77](https://github.com/ome/omero_search_engine/pull/77) 0.5.1 (February 2023): ---------------------- From ffe36d095563b38c12daf865c7cd55cdfa193cad Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Tue, 25 Jul 2023 18:02:03 +0100 Subject: [PATCH 20/28] Secure the connection with the elsticsearch --- configurations/app_config.yml | 2 ++ configurations/configuration.py | 13 +++++++++++++ manage.py | 16 ++++++++++++++++ omero_search_engine/__init__.py | 11 ++++++----- 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/configurations/app_config.yml b/configurations/app_config.yml index f888be60..084300da 100644 --- a/configurations/app_config.yml +++ b/configurations/app_config.yml @@ -12,3 +12,5 @@ PAGE_SIZE : 1000 CACHE_ROWS : 10000 MAX_RETUNED_ITEMS : 1700000 ELASTICSEARCH_BACKUP_FOLDER: "path/to/elasticsearch/backup/folder" +verify_certs: False +ELASTIC_PASSWORD: elasticsearch_user_password diff --git a/configurations/configuration.py b/configurations/configuration.py index 9d2dc099..900205ae 100644 --- a/configurations/configuration.py +++ b/configurations/configuration.py @@ -20,6 +20,7 @@ import yaml from shutil import copyfile import os +import json def load_configuration_variables_from_file(config): @@ -29,6 +30,18 @@ def load_configuration_variables_from_file(config): cofg = yaml.load(f) for x, y in cofg.items(): setattr(config, x, y) + if hasattr(config, "verify_certs"): + try: + verify_certs = json.load(config.verify_certs) + except: + verify_certs = False + else: + verify_certs=False + config.verify_certs=verify_certs + if not verify_certs: + import requests + from requests.packages.urllib3.exceptions import InsecureRequestWarning + requests.packages.urllib3.disable_warnings(InsecureRequestWarning) def set_database_connection_variables(config): diff --git a/manage.py b/manage.py index f13b9201..d7fdb504 100644 --- a/manage.py +++ b/manage.py @@ -191,6 +191,22 @@ def set_elasticsearch_configuration(elasticsearch_url=None): else: search_omero_app.logger.info("No attribute is provided") +@manager.command +@manager.option("-e", "--elasticsearch_password", help="set elasticsearch password") +def set_elasticsearch_password(elasticsearch_password=None): + if elasticsearch_password: + update_config_file({"ELASTIC_PASSWORD": elasticsearch_password}) + else: + search_omero_app.logger.info("No attribute is provided") + +@manager.command +@manager.option("-v", "--verify_certs", help="set elasticsearch password") +def set_verify_certs(verify_certs=None): + if verify_certs: + update_config_file({"verify_certs": verify_certs}) + else: + search_omero_app.logger.info("No attribute is provided") + @manager.command @manager.option("-c", "--cache_folder", help="cache folder path") diff --git a/omero_search_engine/__init__.py b/omero_search_engine/__init__.py index dcc7196a..3b88d3f9 100644 --- a/omero_search_engine/__init__.py +++ b/omero_search_engine/__init__.py @@ -22,7 +22,7 @@ import logging from elasticsearch import Elasticsearch from flasgger import Swagger, LazyString, LazyJSONEncoder - +import json from omero_search_engine.database.database_connector import DatabaseConnector from configurations.configuration import ( configLooader, @@ -54,12 +54,10 @@ "version": "0.2.0", } - swagger = Swagger(search_omero_app, template=template) app_config = load_configuration_variables_from_file(config_) - def create_app(config_name="development"): app_config = configLooader.get(config_name) load_configuration_variables_from_file(app_config) @@ -72,12 +70,17 @@ def create_app(config_name="development"): search_omero_app.app_context().push() search_omero_app.app_context() search_omero_app.app_context().push() + ELASTIC_PASSWORD = app_config.ELASTIC_PASSWORD + es_connector = Elasticsearch( app_config.ELASTICSEARCH_URL.split(","), + verify_certs=app_config.verify_certs, timeout=130, max_retries=20, retry_on_timeout=True, connections_per_node=10, + scheme="https", + http_auth=("elastic", ELASTIC_PASSWORD), ) search_omero_app.config["database_connector"] = database_connector @@ -103,7 +106,6 @@ def create_app(config_name="development"): search_omero_app.logger.info("app assistant startup") return search_omero_app - create_app() from omero_search_engine.api.v1.resources import ( # noqa @@ -114,7 +116,6 @@ def create_app(config_name="development"): resources_routers_blueprint_v1, url_prefix="/api/v1/resources" ) - # add it to account for CORS @search_omero_app.after_request def after_request(response): From 50cd783f3c96d35531c97e49ab058f918d72ad98 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 26 Jul 2023 11:36:44 +0000 Subject: [PATCH 21/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- configurations/configuration.py | 5 +++-- manage.py | 2 ++ omero_search_engine/__init__.py | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/configurations/configuration.py b/configurations/configuration.py index 900205ae..cee74068 100644 --- a/configurations/configuration.py +++ b/configurations/configuration.py @@ -36,11 +36,12 @@ def load_configuration_variables_from_file(config): except: verify_certs = False else: - verify_certs=False - config.verify_certs=verify_certs + verify_certs = False + config.verify_certs = verify_certs if not verify_certs: import requests from requests.packages.urllib3.exceptions import InsecureRequestWarning + requests.packages.urllib3.disable_warnings(InsecureRequestWarning) diff --git a/manage.py b/manage.py index d7fdb504..4f1874f9 100644 --- a/manage.py +++ b/manage.py @@ -191,6 +191,7 @@ def set_elasticsearch_configuration(elasticsearch_url=None): else: search_omero_app.logger.info("No attribute is provided") + @manager.command @manager.option("-e", "--elasticsearch_password", help="set elasticsearch password") def set_elasticsearch_password(elasticsearch_password=None): @@ -199,6 +200,7 @@ def set_elasticsearch_password(elasticsearch_password=None): else: search_omero_app.logger.info("No attribute is provided") + @manager.command @manager.option("-v", "--verify_certs", help="set elasticsearch password") def set_verify_certs(verify_certs=None): diff --git a/omero_search_engine/__init__.py b/omero_search_engine/__init__.py index 3b88d3f9..90fc6d86 100644 --- a/omero_search_engine/__init__.py +++ b/omero_search_engine/__init__.py @@ -58,6 +58,7 @@ app_config = load_configuration_variables_from_file(config_) + def create_app(config_name="development"): app_config = configLooader.get(config_name) load_configuration_variables_from_file(app_config) @@ -106,6 +107,7 @@ def create_app(config_name="development"): search_omero_app.logger.info("app assistant startup") return search_omero_app + create_app() from omero_search_engine.api.v1.resources import ( # noqa @@ -116,6 +118,7 @@ def create_app(config_name="development"): resources_routers_blueprint_v1, url_prefix="/api/v1/resources" ) + # add it to account for CORS @search_omero_app.after_request def after_request(response): From 6f6df0a94e4acf8a047a2c6a70966a77f391d8c1 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Wed, 26 Jul 2023 12:47:33 +0100 Subject: [PATCH 22/28] Fix pre commit checks --- configurations/configuration.py | 8 +++++--- omero_search_engine/__init__.py | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/configurations/configuration.py b/configurations/configuration.py index 900205ae..1742b249 100644 --- a/configurations/configuration.py +++ b/configurations/configuration.py @@ -33,14 +33,16 @@ def load_configuration_variables_from_file(config): if hasattr(config, "verify_certs"): try: verify_certs = json.load(config.verify_certs) - except: + except Exception as ex: + print(str(ex)) verify_certs = False else: - verify_certs=False - config.verify_certs=verify_certs + verify_certs = False + config.verify_certs = verify_certs if not verify_certs: import requests from requests.packages.urllib3.exceptions import InsecureRequestWarning + requests.packages.urllib3.disable_warnings(InsecureRequestWarning) diff --git a/omero_search_engine/__init__.py b/omero_search_engine/__init__.py index 3b88d3f9..adf38f99 100644 --- a/omero_search_engine/__init__.py +++ b/omero_search_engine/__init__.py @@ -22,7 +22,6 @@ import logging from elasticsearch import Elasticsearch from flasgger import Swagger, LazyString, LazyJSONEncoder -import json from omero_search_engine.database.database_connector import DatabaseConnector from configurations.configuration import ( configLooader, @@ -58,6 +57,7 @@ app_config = load_configuration_variables_from_file(config_) + def create_app(config_name="development"): app_config = configLooader.get(config_name) load_configuration_variables_from_file(app_config) @@ -106,6 +106,7 @@ def create_app(config_name="development"): search_omero_app.logger.info("app assistant startup") return search_omero_app + create_app() from omero_search_engine.api.v1.resources import ( # noqa @@ -116,6 +117,7 @@ def create_app(config_name="development"): resources_routers_blueprint_v1, url_prefix="/api/v1/resources" ) + # add it to account for CORS @search_omero_app.after_request def after_request(response): From a36a3a7a01dfa4f3720a1ea54ab893050f131bc5 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Wed, 26 Jul 2023 17:03:52 +0100 Subject: [PATCH 23/28] Update to elasticsearch 8.8.1 --- .github/workflows/main.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bba284d6..632ebb6f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,10 +24,10 @@ jobs: --health-retries 5 elasticsearch: - image: elasticsearch:7.16.2 + image: elasticsearch:8.8.1 ports: - 9200/tcp - options: -e="discovery.type=single-node" --health-cmd="curl http://localhost:9200/_cluster/health" --health-interval=10s --health-timeout=5s --health-retries=10 + options: -e="discovery.type=single-node" -e="ELASTIC_PASSWORD=elasticsearch_user_password" --health-cmd="curl http://localhost:9200/_cluster/health" --health-interval=10s --health-timeout=5s --health-retries=10 steps: - uses: actions/checkout@v2 @@ -84,5 +84,3 @@ jobs: file: deployment/docker/centos/Dockerfile push: true tags: ${{ join(fromJson(steps.gettags.outputs.tags)) }} - - From 08f554b4539e4675a46a876a7a7b0177e35f3617 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Wed, 26 Jul 2023 17:18:29 +0100 Subject: [PATCH 24/28] FIx action --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 632ebb6f..9f1977f6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -27,7 +27,7 @@ jobs: image: elasticsearch:8.8.1 ports: - 9200/tcp - options: -e="discovery.type=single-node" -e="ELASTIC_PASSWORD=elasticsearch_user_password" --health-cmd="curl http://localhost:9200/_cluster/health" --health-interval=10s --health-timeout=5s --health-retries=10 + options: -e="discovery.type=single-node" --es_api_basic_auth_username="elastic" --ELASTIC_PASSWORD="elasticsearch_user_password" --health-cmd="curl -k -u elastic:elasticsearch_user_password https://localhost:9200/_cluster/health" --health-interval=10s --health-timeout=5s --health-retries=10 steps: - uses: actions/checkout@v2 From c0e37ee81ce5fa8565fa97aec41be747747d08fe Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Wed, 26 Jul 2023 17:26:19 +0100 Subject: [PATCH 25/28] Set elastic search username and password --- .github/workflows/main.yml | 6 +++++- configurations/app_config.yml | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9f1977f6..e45791cb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -27,7 +27,11 @@ jobs: image: elasticsearch:8.8.1 ports: - 9200/tcp - options: -e="discovery.type=single-node" --es_api_basic_auth_username="elastic" --ELASTIC_PASSWORD="elasticsearch_user_password" --health-cmd="curl -k -u elastic:elasticsearch_user_password https://localhost:9200/_cluster/health" --health-interval=10s --health-timeout=5s --health-retries=10 + env: + es_api_basic_auth_username: "elastic" + ELASTIC_PASSWORD: "elasticsearch_user_password" + + options: -e="discovery.type=single-node" --health-cmd="curl -k -u elastic:elasticsearch_user_password https://localhost:9200/_cluster/health" --health-interval=10s --health-timeout=5s --health-retries=10 steps: - uses: actions/checkout@v2 diff --git a/configurations/app_config.yml b/configurations/app_config.yml index 084300da..4c1a2e21 100644 --- a/configurations/app_config.yml +++ b/configurations/app_config.yml @@ -6,7 +6,7 @@ DATABASE_NAME : "omero" CACHE_FOLDER : "path/to/folder/app_data" SECRET_KEY : "fsdasdh3424vvcsd467fgh" ASYNCHRONOUS_SEARCH : True -ELASTICSEARCH_URL : "http://localhost:9200" +ELASTICSEARCH_URL : "https://localhost:9200" IDR_TEST_FILE_URL : "https://raw.githubusercontent.com/IDR/idr.openmicroscopy.org/master/_data/studies.tsv" PAGE_SIZE : 1000 CACHE_ROWS : 10000 From 0643e0228be760f93c869c8b536591fd8167c5eb Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Tue, 5 Sep 2023 16:03:22 +0100 Subject: [PATCH 26/28] add instruction to set ELASTIC_PASSWORD --- docs/configuration/configuration_installation.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/configuration/configuration_installation.rst b/docs/configuration/configuration_installation.rst index f680367f..1e4ba43b 100644 --- a/docs/configuration/configuration_installation.rst +++ b/docs/configuration/configuration_installation.rst @@ -14,10 +14,12 @@ The application should have the access attributes (e.g, URL, username, password, * ``DATABASE_NAME`` * ``ELASTICSEARCH__URL`` * ``PAGE_SIZE`` + * ``ELASTIC_PASSWORD`` * Although the user can edit this file to set the values, there are some methods inside :omero_search_engine:`manage.py ` which could help to set the configuration e.g. * ``set_database_configuration`` * ``set_elasticsearch_configuration`` + * ``set_elasticsearch_password`` * When the app runs for the first time, it will look for the application configuration file. From 7d5a83799ee2547c14bad64e9d582814da450a32 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 25 Sep 2023 11:33:57 +0100 Subject: [PATCH 27/28] add tag 0.5.3 to changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55f6b836..ddd17779 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +0.5.3 (September 2023): +----------------------- +- Secure the connection with the elsticsearch [#92](https://github.com/ome/omero_search_engine/pull/92) + 0.5.2 (June 2023): ------------------ - Return all the available values for a key in a container [#77](https://github.com/ome/omero_search_engine/pull/77) From 77c8c74a5a22ce7e9c0caebb3db89d876e2b1018 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 25 Sep 2023 12:10:06 +0100 Subject: [PATCH 28/28] Update CHANGELOG.md Co-authored-by: jean-marie burel --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ddd17779..adf65a54 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ 0.5.3 (September 2023): ----------------------- -- Secure the connection with the elsticsearch [#92](https://github.com/ome/omero_search_engine/pull/92) +- Secure the connection with elasticsearch [#92](https://github.com/ome/omero_search_engine/pull/92) + 0.5.2 (June 2023): ------------------