diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 4d3bfaa5..e82dd7ea 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -48,6 +48,13 @@ jobs:
python manage.py set_database_configuration -u localhost -s ${{ job.services.postgres.ports[5432] }} -n postgress -p passwprd
# configure elasticsearch
python manage.py set_elasticsearch_configuration -e localhost:${{ job.services.elasticsearch.ports[9200] }}
+ # download and extract the database backup file
+ wget https://downloads.openmicroscopy.org/images/omero_db_searchengine.zip -P app_data
+ unzip app_data/omero_db_searchengine.zip -d app_data/
+ # run restore omero database
+ python manage.py restore_postgresql_database
+ # run indexing indexing
+ python manage.py get_index_data_from_database -b False
# run tests
python -m unittest discover -s unit_tests
upload:
diff --git a/app_data/test_index_data.json b/app_data/test_index_data.json
index 658d43a4..d9ba7f9b 100644
--- a/app_data/test_index_data.json
+++ b/app_data/test_index_data.json
@@ -96,5 +96,27 @@
"validation screen"
]
]
- }
+ },
+ "query_in": {
+ "image": [
+ [
+ "Gene Symbol",
+ [
+ "Duoxa2",
+ "Bach2",
+ "Cxcr2",
+ "Mysm1"
+ ]
+ ],
+ [
+ "Organism",
+ [
+ "homo sapiens",
+ "mus musculus",
+ "mus musculus x mus spretus",
+ "human adenovirus 2"
+ ]
+ ]
+ ]
+ }
}
diff --git a/configurations/app_config.yml b/configurations/app_config.yml
index fb98ec2e..39f324ac 100644
--- a/configurations/app_config.yml
+++ b/configurations/app_config.yml
@@ -15,3 +15,4 @@ ELASTICSEARCH_BACKUP_FOLDER: "path/to/elasticsearch/backup/folder"
verify_certs: False
ELASTIC_PASSWORD: elasticsearch_user_password
STATEMENT_TIMEOUT: 5000
+BASE_FOLDER: /etc/searchengine/
diff --git a/examples/search.py b/examples/search.py
index fedd9fe6..df5b2a24 100644
--- a/examples/search.py
+++ b/examples/search.py
@@ -88,5 +88,9 @@ def call_omero_return_results(url, data=None, method="post"):
% (len(received_results), total_results, page, total_pages, bookmark)
)
-# 2000 /11686633, page: 1/11687, bookmark: 109600
-# 2000 /12225067, page: 1/12226, bookmark: 109600
+# another example using in operators and send items inside value as a string,
+# The List items are separated by ','
+logging.info("Using in operator")
+url = "%s%s?key=Gene Symbol&value=Pdgfc,Rnase10&operator=in" % (base_url, image_search)
+bookmark, total_results, total_pages = call_omero_return_results(url, method="get")
+logging.info("%s,%s" % (total_results, total_pages))
diff --git a/examples/using_in_operator.py b/examples/using_in_operator.py
new file mode 100644
index 00000000..173222d1
--- /dev/null
+++ b/examples/using_in_operator.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2024 University of Dundee & Open Microscopy Environment.
+# All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+
+from utils import query_the_search_ending, logging
+
+# It is similar to use the 'in' operator in a sql statement,
+# rather than having multiple 'or' conditions,
+# it will only use a single condition.
+
+# The following example will search for the images which have any of the 'Gene Symbol'
+# values in this list ["Duoxa2", "Bach2", "Cxcr2", "Mysm1"]
+
+# and filters
+
+logging.info("Example of using in operator")
+
+
+values_in = ["Duoxa2", "Bach2", "Cxcr2", "Mysm1"]
+logging.info("Searching for 'Gene Symbol' with values in [%s]" % (",".join(values_in)))
+and_filters = [{"name": "Gene Symbol", "value": values_in, "operator": "in"}]
+
+main_attributes = []
+query = {"and_filters": and_filters}
+#
+recieved_results_data = query_the_search_ending(query, main_attributes)
diff --git a/examples/using_not_in_operator.py b/examples/using_not_in_operator.py
new file mode 100644
index 00000000..1765a051
--- /dev/null
+++ b/examples/using_not_in_operator.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2024 University of Dundee & Open Microscopy Environment.
+# All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+
+from utils import query_the_search_ending, logging
+
+# It is similar to use the 'not in' operator in a sql statement,
+# rather than having multiple 'or' conditions with not_equals operators,
+# it will only use a single condition.
+
+# The following example will search for the images which have met any of the 'Organism'
+# values in this list
+# ["homo sapiens","mus musculus","mus musculus x mus spretus","human adenovirus 2"]
+
+# and filters
+
+logging.info("Example of using not_in operator")
+
+
+values_not_in = [
+ "homo sapiens",
+ "mus musculus",
+ "mus musculus x mus spretus",
+ "human adenovirus 2",
+]
+logging.info("Searching for 'Organism' with values in [%s]" % (",".join(values_not_in)))
+and_filters = [{"name": "Organism", "value": values_not_in, "operator": "not_in"}]
+
+main_attributes = []
+query = {"and_filters": and_filters}
+#
+received_results_data = query_the_search_ending(query, main_attributes)
diff --git a/manage.py b/manage.py
index 085c26ae..22a77ba5 100644
--- a/manage.py
+++ b/manage.py
@@ -114,13 +114,25 @@ def sql_results_to_panda():
pass
+@manager.command
+def restore_postgresql_database():
+ from omero_search_engine.database.utils import restore_database
+
+ restore_database()
+
+
@manager.command
@manager.option(
"-r",
"--resource",
help="resource name, creating all the indexes for all the resources is the default", # noqa
)
-def get_index_data_from_database(resource="all"):
+@manager.option(
+ "-b",
+ "--backup",
+ help="if True, backup will be called ", # noqa
+)
+def get_index_data_from_database(resource="all", backup="True"):
"""
insert data in Elasticsearch index for each resource
It gets the data from postgres database server
@@ -132,7 +144,9 @@ def get_index_data_from_database(resource="all"):
get_insert_data_to_index,
save_key_value_buckets,
)
+ import json
+ backup = json.loads(backup.lower())
if resource != "all":
sql_st = sqls_resources.get(resource)
if not sql_st:
@@ -148,7 +162,8 @@ def get_index_data_from_database(resource="all"):
test_indexing_search_query(deep_check=False, check_studies=True)
# backup the index data
- backup_elasticsearch_data()
+ if backup:
+ backup_elasticsearch_data()
# set configurations
@@ -360,6 +375,44 @@ def restore_elasticsearch_data():
restore_indices_data()
+@manager.command
+@manager.option("-s", "--screen_name", help="Screen name, or part of it")
+@manager.option("-p", "--project_name", help="Project name, or part of it")
+def data_validator(screen_name=None, project_name=None):
+ """
+ Checking key-value pair for trailing and heading space.
+ It also checks the key-value pair duplication.
+ It can check all the projects and screens.
+ Also, it can run for a specific project or screen.
+ The output is a collection of CSV files; each check usually generates three files:
+ The main file contains image details (e.g. image id)
+ in addition to the key and the value.
+ one file for screens and one for projects.
+ Each file contains the screen name (project name),
+ the key-value pair which has the issue and the total number of affected
+ images for each row.
+ """
+ from datetime import datetime
+
+ if screen_name and project_name:
+ print("Either screen name or project name is allowed")
+
+ from omero_search_engine.validation.omero_keyvalue_data_validator import (
+ check_for_heading_space,
+ check_for_trailing_space,
+ check_duplicated_keyvalue_pairs,
+ )
+
+ start = datetime.now()
+ check_for_trailing_space(screen_name, project_name)
+ start1 = datetime.now()
+ check_for_heading_space(screen_name, project_name)
+ start2 = datetime.now()
+ check_duplicated_keyvalue_pairs(screen_name, project_name)
+ end = datetime.now()
+ print("start: %s, start1: %s, start2: %s, end: %s" % (start, start1, start2, end))
+
+
@manager.command
def test_container_key_value():
from omero_search_engine.validation.results_validator import (
@@ -370,4 +423,7 @@ def test_container_key_value():
if __name__ == "__main__":
+ from flask_script import Command
+
+ Command.capture_all_args = False
manager.run()
diff --git a/omero_search_engine/api/v1/resources/query_handler.py b/omero_search_engine/api/v1/resources/query_handler.py
index 822b6b7f..df0c6372 100644
--- a/omero_search_engine/api/v1/resources/query_handler.py
+++ b/omero_search_engine/api/v1/resources/query_handler.py
@@ -35,6 +35,9 @@
"screen": {"name": "name", "description": "description"},
}
+res_and_main_attributes = None
+res_or_main_attributes = None
+
def check_get_names(idr_, resource, attribute, return_exact=False):
# check the idr name and return the resource and possible values
@@ -107,8 +110,10 @@ def adjust_resource(self):
)
if len(ac_value) == 1:
self.value = ac_value[0]
- else:
+ elif len(ac_value) == 0:
self.value = -1
+ else:
+ self.value = ac_value
"""
pr_names = get_resource_names(self.resource)
if not self.value in pr_names:
@@ -337,6 +342,7 @@ def get_image_non_image_query(self):
def run_query(self, query_, resource):
main_attributes = {}
+
query = {"and_filters": [], "or_filters": []}
if query_.get("and_filters"):
@@ -398,6 +404,11 @@ def run_query(self, query_, resource):
# res = search_query(query, resource, bookmark,
# self.raw_elasticsearch_query,
# main_attributes,return_containers=self.return_containers)
+ global res_and_main_attributes, res_or_main_attributes
+ if res_and_main_attributes:
+ main_attributes["and_main_attributes"] = (
+ main_attributes.get("and_main_attributes") + res_and_main_attributes
+ )
if resource == "image" and self.return_containers:
res = search_query(
query,
@@ -633,6 +644,12 @@ def determine_search_results_(query_, return_columns=False, return_containers=Fa
and_filters = query_.get("query_details").get("and_filters")
or_filters = query_.get("query_details").get("or_filters")
and_query_groups = []
+ main_attributes = query_.get("main_attributes")
+ global res_and_main_attributes, res_or_main_attributes
+ if main_attributes:
+ res_and_main_attributes = main_attributes.get("and_main_attributes")
+ res_or_main_attributes = main_attributes.get("or_main_attributes")
+
columns_def = query_.get("columns_def")
or_query_groups = []
if and_filters and len(and_filters) > 0:
@@ -785,9 +802,9 @@ def add_local_schemas_to(resolver, schema_folder, base_uri, schema_ext=".json"):
def query_validator(query):
- query_schema_file = (
- "omero_search_engine/api/v1/resources/schemas/query_data.json" # noqa
- )
+ print("TRoz", query)
+ main_dir = os.path.abspath(os.path.dirname(__file__))
+ query_schema_file = os.path.join(main_dir, "schemas", "query_data.json")
base_uri = "file:" + abspath("") + "/"
with open(query_schema_file, "r") as schema_f:
query_schema = json.loads(schema_f.read())
diff --git a/omero_search_engine/api/v1/resources/schemas/filter_schema.json b/omero_search_engine/api/v1/resources/schemas/filter_schema.json
index 3f0df36a..611388ba 100644
--- a/omero_search_engine/api/v1/resources/schemas/filter_schema.json
+++ b/omero_search_engine/api/v1/resources/schemas/filter_schema.json
@@ -13,12 +13,12 @@
},
"value": {
"name":"value",
- "type": "string"
+ "type": ["array", "string"]
},
"operator": {
"name": "operator",
"type": "string",
- "enum": ["equals", "not_equals", "contains","not_contains"]
+ "enum": ["equals", "not_equals", "contains", "not_contains", "in", "not_in"]
}
,"resource": {
"name": "resource",
diff --git a/omero_search_engine/api/v1/resources/swagger_docs/search.yml b/omero_search_engine/api/v1/resources/swagger_docs/search.yml
index fdb70676..ad3c861c 100644
--- a/omero_search_engine/api/v1/resources/swagger_docs/search.yml
+++ b/omero_search_engine/api/v1/resources/swagger_docs/search.yml
@@ -28,7 +28,7 @@ parameters:
description: operator, default equals
in: query
type: string
- enum: ['equals', 'not_equals', 'contains', 'not_contains']
+ enum: ['equals', 'not_equals', 'contains', 'not_contains', 'in', 'not_in']
- name: case_sensitive
description: case sensitive query, default False
in: query
diff --git a/omero_search_engine/api/v1/resources/utils.py b/omero_search_engine/api/v1/resources/utils.py
index 41d7d889..2af31430 100644
--- a/omero_search_engine/api/v1/resources/utils.py
+++ b/omero_search_engine/api/v1/resources/utils.py
@@ -113,10 +113,31 @@ def get_resource_annotation_table(resource_table):
"""
{"match": {"key_values.value.keyvaluenormalize":"$value"}}"""
)
+
+# in operator
+case_insensitive_must_in_value_condition_template = Template(
+ """
+{"terms": {"key_values.value.keyvaluenormalize":$value}}"""
+)
+
case_sensitive_must_value_condition_template = Template(
"""
{"match": {"key_values.value.keyvalue":"$value"}}"""
)
+
+nested_query_template_must_must_not = Template(
+ """
+{"nested": {"path": "key_values",
+"query":{"bool": {"must":[$must_part], "must_not":[$must_not_part]}}}}"""
+)
+
+# in opeartor
+case_sensitive_must_in_value_condition_template = Template(
+ """
+{"terms": {"key_values.value.keyvalue":$value}}"""
+)
+
+
nested_keyvalue_pair_query_template = Template(
"""
{"nested": {"path": "key_values",
@@ -172,6 +193,8 @@ def get_resource_annotation_table(resource_table):
{"name.keyvalue": "$idr"}}}}]}}} """
)
+operators_required_list_data_type = ["in", "not_in"]
+
def build_error_message(error):
"""
@@ -291,8 +314,20 @@ def elasticsearch_query_builder(
search_omero_app.logger.info("FILTER %s" % filter)
try:
key = filter["name"].strip()
- value = filter["value"].strip()
operator = filter["operator"].strip()
+ if operator in operators_required_list_data_type:
+ if isinstance(filter["value"], list):
+ value_ = filter["value"]
+ else:
+ # in case of providing it with single query, the values should
+ # be provided as a string separated the array items by ','
+ value_ = filter["value"].split(",")
+ value = [val.strip() for val in value_]
+ value = json.dumps(value)
+
+ else:
+ value = filter["value"].strip()
+
except Exception as e:
search_omero_app.logger.info(str(e))
return build_error_message(
@@ -333,6 +368,61 @@ def elasticsearch_query_builder(
nested=",".join(_nested_must_part)
)
)
+
+ if operator == "in":
+ if case_sensitive:
+ _nested_must_part.append(
+ case_sensitive_must_in_value_condition_template.substitute( # noqa
+ value=value
+ )
+ )
+ _nested_must_part.append(
+ case_sensitive_must_name_condition_template.substitute(name=key)
+ ) # noqa
+
+ else:
+ _nested_must_part.append(
+ case_insensitive_must_in_value_condition_template.substitute( # noqa
+ value=value
+ )
+ )
+ _nested_must_part.append(
+ case_insensitive_must_name_condition_template.substitute( # noqa
+ name=key
+ )
+ )
+
+ nested_must_part.append(
+ nested_keyvalue_pair_query_template.substitute(
+ nested=",".join(_nested_must_part)
+ )
+ )
+
+ if operator == "not_in":
+ if case_sensitive:
+ nested_must_part.append(
+ nested_query_template_must_must_not.substitute(
+ must_not_part=case_sensitive_must_in_value_condition_template.substitute( # noqa
+ value=value
+ ),
+ must_part=case_sensitive_must_name_condition_template.substitute( # noqa
+ name=key
+ ),
+ )
+ )
+
+ else:
+ nested_must_part.append(
+ nested_query_template_must_must_not.substitute(
+ must_not_part=case_insensitive_must_in_value_condition_template.substitute( # noqa
+ value=value
+ ),
+ must_part=case_insensitive_must_name_condition_template.substitute( # noqa
+ name=key
+ ),
+ )
+ )
+
if operator == "contains":
value = "*{value}*".format(value=adjust_value(value))
# _nested_must_part.append(must_name_condition_template.substitute(name=key)) # noqa
@@ -371,64 +461,50 @@ def elasticsearch_query_builder(
value = "*{value}*".format(value=adjust_value(value))
if case_sensitive:
nested_must_part.append(
- nested_keyvalue_pair_query_template.substitute(
- nested=case_sensitive_must_name_condition_template.substitute( # noqa
- name=key
- )
- )
- )
- nested_must_not_part.append(
- nested_keyvalue_pair_query_template.substitute(
- nested=case_sensitive_wildcard_value_condition_template.substitute( # noqa
+ nested_query_template_must_must_not.substitute(
+ must_not_part=case_sensitive_wildcard_value_condition_template.substitute( # noqa
wild_card_value=value
- )
+ ),
+ must_part=case_sensitive_must_name_condition_template.substitute( # noqa
+ name=key
+ ),
)
)
+
else:
nested_must_part.append(
- nested_keyvalue_pair_query_template.substitute(
- nested=case_insensitive_must_name_condition_template.substitute( # noqa
- name=key
- )
- )
- )
- nested_must_not_part.append(
- nested_keyvalue_pair_query_template.substitute(
- nested=case_insensitive_wildcard_value_condition_template.substitute( # noqa
+ nested_query_template_must_must_not.substitute(
+ must_not_part=case_insensitive_wildcard_value_condition_template.substitute( # noqa
wild_card_value=value
- )
+ ),
+ must_part=case_insensitive_must_name_condition_template.substitute( # noqa
+ name=key
+ ),
)
)
else:
if case_sensitive:
nested_must_part.append(
- nested_keyvalue_pair_query_template.substitute(
- nested=case_sensitive_must_name_condition_template.substitute( # noqa
- name=key
- )
- )
- )
- nested_must_not_part.append(
- nested_keyvalue_pair_query_template.substitute(
- nested=case_sensitive_must_value_condition_template.substitute( # noqa
+ nested_query_template_must_must_not.substitute(
+ must_not_part=case_sensitive_must_value_condition_template.substitute( # noqa
value=value
- )
+ ),
+ must_part=case_sensitive_must_name_condition_template.substitute( # noqa
+ name=key
+ ),
)
)
+
else:
nested_must_part.append(
- nested_keyvalue_pair_query_template.substitute(
- nested=case_insensitive_must_name_condition_template.substitute( # noqa
- name=key
- )
- )
- )
- nested_must_not_part.append(
- nested_keyvalue_pair_query_template.substitute(
- nested=case_insensitive_must_value_condition_template.substitute( # noqa
+ nested_query_template_must_must_not.substitute(
+ must_not_part=case_insensitive_must_value_condition_template.substitute( # noqa
value=value
- )
+ ),
+ must_part=case_insensitive_must_name_condition_template.substitute( # noqa
+ name=key
+ ),
)
)
@@ -618,7 +694,6 @@ def elasticsearch_query_builder(
ff = nested_query_template_must_not.substitute(must_not_value=ss)
should_part_list_or.append(ff)
all_terms = ""
-
for should_part_list_ in all_should_part_list:
if isinstance(should_part_list_, dict):
should_part_list = should_part_list_.get("main")
diff --git a/omero_search_engine/cache_functions/elasticsearch/transform_data.py b/omero_search_engine/cache_functions/elasticsearch/transform_data.py
index 49fad1a8..8376b1f5 100644
--- a/omero_search_engine/cache_functions/elasticsearch/transform_data.py
+++ b/omero_search_engine/cache_functions/elasticsearch/transform_data.py
@@ -36,7 +36,7 @@
)
from omero_search_engine.validation.psql_templates import (
query_images_in_project_id,
- query_images_screen_id,
+ query_images_in_screen_id,
)
from app_data.data_attrs import annotation_resource_link
@@ -691,7 +691,7 @@ def save_key_value_buckets(
if resource_table == "project":
sql_n = query_images_in_project_id.substitute(project_id=id)
elif resource_table == "screen":
- sql_n = query_images_screen_id.substitute(screen_id=id)
+ sql_n = query_images_in_screen_id.substitute(screen_id=id)
no_images_co = conn.execute_query(
sql_n, statement_timeout=statement_timeout
)
diff --git a/omero_search_engine/database/utils.py b/omero_search_engine/database/utils.py
new file mode 100644
index 00000000..8f6e58da
--- /dev/null
+++ b/omero_search_engine/database/utils.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2024 University of Dundee & Open Microscopy Environment.
+# All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+import os
+import sys
+import subprocess
+
+
+def restore_database():
+ """
+ restote the database from a database dump file
+ """
+ from omero_search_engine import search_omero_app
+
+ main_dir = os.path.abspath(os.path.dirname(__file__))
+ mm = main_dir.replace("omero_search_engine/database", "")
+ sys.path.append(mm)
+ dat_file_name = os.path.join(mm, "app_data/omero.pgdump")
+ restore_command = "psql --username %s --host %s --port %s -d %s -f %s" % (
+ search_omero_app.config.get("DATABASE_USER"),
+ search_omero_app.config.get("DATABASE_SERVER_URI"),
+ search_omero_app.config.get("DATABASE_PORT"),
+ search_omero_app.config.get("DATABASE_NAME"),
+ dat_file_name,
+ )
+ try:
+ proc = subprocess.Popen(
+ restore_command,
+ shell=True,
+ env={"PGPASSWORD": search_omero_app.config.get("DATABASE_PASSWORD")},
+ )
+ proc.wait()
+ except Exception as e:
+ print("Exception happened during dump %s" % (e))
diff --git a/omero_search_engine/validation/omero_keyvalue_data_validator.py b/omero_search_engine/validation/omero_keyvalue_data_validator.py
new file mode 100644
index 00000000..d9d203d2
--- /dev/null
+++ b/omero_search_engine/validation/omero_keyvalue_data_validator.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2024 University of Dundee & Open Microscopy Environment.
+# All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+from omero_search_engine import search_omero_app
+from omero_search_engine.validation.psql_templates import (
+ trail_space_query,
+ head_space_query,
+ duplicated_keyvalue_pairs_query,
+)
+import os
+import pandas as pd
+
+conn = search_omero_app.config["database_connector"]
+
+
+def prepare_the_sql_statement(sql_template, screen_name, project_name, add_where=""):
+ """
+ customize the sql statement
+ """
+ if not screen_name and project_name:
+ return sql_template.substitute(
+ condition=" {add_where} project.name like '%{project_name}%'".format(
+ add_where=add_where, project_name=project_name
+ )
+ )
+ elif not project_name and screen_name:
+ return sql_template.substitute(
+ condition=" {add_where} screen.name like '%{screen_name}%'".format(
+ add_where=add_where, screen_name=screen_name
+ )
+ )
+ elif not screen_name and not project_name:
+ return sql_template.substitute(condition="")
+
+
+def check_for_trailing_space(screen_name, project_name):
+ search_omero_app.logger.info("Checking for trailing space ...")
+ sql_statment = prepare_the_sql_statement(
+ trail_space_query, screen_name, project_name, " and"
+ )
+ tail_space_results = conn.execute_query(sql_statment)
+ if len(tail_space_results) == 0:
+ search_omero_app.logger.info("No results is available for trailing space")
+ return
+ search_omero_app.logger.info("Generate for trailing space ...")
+ generate_reports(tail_space_results, "trailing_space", screen_name, project_name)
+
+
+def check_for_heading_space(screen_name, project_name):
+ search_omero_app.logger.info("Checking for heading space ...")
+ sql_statment = prepare_the_sql_statement(
+ head_space_query, screen_name, project_name, " and"
+ )
+ head_space_results = conn.execute_query(sql_statment)
+ if len(head_space_results) == 0:
+ search_omero_app.logger.info("No results available for heading space")
+ return
+ search_omero_app.logger.info("Generate for head space ...")
+ generate_reports(head_space_results, "heading_space", screen_name, project_name)
+
+
+def check_duplicated_keyvalue_pairs(screen_name, project_name):
+ search_omero_app.logger.info("Checking for duplicated key-value pairs...")
+ sql_statment = prepare_the_sql_statement(
+ duplicated_keyvalue_pairs_query, screen_name, project_name, "where"
+ )
+ duplicated_keyvalue_pairs_results = conn.execute_query(sql_statment)
+ if len(duplicated_keyvalue_pairs_results) == 0:
+ search_omero_app.logger.info(
+ "No results available for duplicated key-value pairs"
+ )
+ return
+ search_omero_app.logger.info("Generate reports for duplicated key-value pairs...")
+ generate_reports(
+ duplicated_keyvalue_pairs_results,
+ "duplicated_keyvalue_pairs",
+ screen_name,
+ project_name,
+ )
+
+
+def generate_reports(results, check_type, screen_name, project_name):
+ """
+ Generate the output CSV files contents and save them
+ """
+ df = pd.DataFrame(results)
+ base_folder = search_omero_app.config.get("BASE_FOLDER")
+ if not os.path.isdir(base_folder):
+ base_folder = os.path.expanduser("~")
+
+ all_fields_file = os.path.join(base_folder, "all_%s.csv" % check_type)
+ screens_file = os.path.join(base_folder, "screens_%s.csv" % check_type)
+ projects_file = os.path.join(base_folder, "projects_%s.csv" % check_type)
+
+ with open(all_fields_file, "w") as text_file:
+ text_file.write(df.to_csv())
+
+ if (not screen_name and not project_name) or screen_name:
+ df2 = (
+ df.groupby(["screen_name", "name", "value"])
+ .size()
+ .reset_index()
+ .rename(columns={0: "number of images"})
+ )
+ with open(screens_file, "w") as text_file:
+ text_file.write(df2.to_csv())
+ search_omero_app.logger.info(df2.sum())
+
+ if (not screen_name and not project_name) or project_name:
+ df3 = (
+ df.groupby(["project_name", "name", "value"])
+ .size()
+ .reset_index()
+ .rename(columns={0: "number of images"})
+ )
+
+ with open(projects_file, "w") as text_file:
+ text_file.write(df3.to_csv())
+ search_omero_app.logger.info(df3.sum())
diff --git a/omero_search_engine/validation/psql_templates.py b/omero_search_engine/validation/psql_templates.py
index fa86ecfe..6209705d 100644
--- a/omero_search_engine/validation/psql_templates.py
+++ b/omero_search_engine/validation/psql_templates.py
@@ -41,6 +41,39 @@ def substitute(self, **kwargs):
return super(SqlSearchEngineTemplate, self).substitute(kwargs)
+# get available values for an image key
+query_images_available_values_for_key = Template(
+ """
+Select DISTINCT lower(annotation_mapvalue.value) from image
+inner join imageannotationlink on image.id =imageannotationlink.parent
+inner join annotation_mapvalue on
+annotation_mapvalue.annotation_id=imageannotationlink.child
+where lower(annotation_mapvalue.name)='$name' """
+)
+
+# get any values for an image keys
+query_images_any_value = Template(
+ """
+Select DISTINCT lower(annotation_mapvalue.name),
+lower(annotation_mapvalue.value) from image
+inner join imageannotationlink on image.id =imageannotationlink.parent
+inner join annotation_mapvalue on
+annotation_mapvalue.annotation_id=imageannotationlink.child
+where lower(annotation_mapvalue.value) like '%$val_part%' """
+)
+
+# get any values for an image keys
+query_images_contains_not_contains = Template(
+ """
+Select DISTINCT image.id from image
+inner join imageannotationlink on image.id =imageannotationlink.parent
+inner join annotation_mapvalue on
+annotation_mapvalue.annotation_id=imageannotationlink.child
+where lower(annotation_mapvalue.name)='$name'
+and lower(annotation_mapvalue.value) $operator ('%$value%') """
+)
+
+
# get images satisfy image key-value query
query_images_key_value = Template(
"""
@@ -49,7 +82,7 @@ def substitute(self, **kwargs):
inner join annotation_mapvalue on
annotation_mapvalue.annotation_id=imageannotationlink.child
where lower(annotation_mapvalue.name)='$name' and
-lower(annotation_mapvalue.value)=lower('$value')"""
+lower(annotation_mapvalue.value)$operator lower('$value')"""
)
# Get number of images which satisfy project key-value query
@@ -65,11 +98,11 @@ def substitute(self, **kwargs):
inner join annotation_mapvalue
on annotation_mapvalue.annotation_id=projectannotationlink.child
where lower(annotation_mapvalue.name)=lower('$name')
-and lower(annotation_mapvalue.value)=lower('$value')"""
+and lower(annotation_mapvalue.value) $operator lower('$value')"""
)
# Get the number of images using "in"
-query_image_or = Template(
+query_image_in = Template(
"""
Select DISTINCT image.id from image
inner join imageannotationlink
@@ -77,7 +110,7 @@ def substitute(self, **kwargs):
inner join annotation_mapvalue
on annotation_mapvalue.annotation_id=imageannotationlink.child
where lower(annotation_mapvalue.name) in ($names)
-and lower(annotation_mapvalue.value) in ($values)"""
+and lower(annotation_mapvalue.value) $operator ($values)"""
)
# Get the images which satisfy screen key-value query
@@ -93,8 +126,8 @@ def substitute(self, **kwargs):
on screen.id =screenannotationlink.parent
inner join annotation_mapvalue
on annotation_mapvalue.annotation_id=screenannotationlink.child
-where lower(annotation_mapvalue.name)='$name'
-and lower(annotation_mapvalue.value)=lower('$value')"""
+where lower(annotation_mapvalue.name)= lower('$name')
+and lower(annotation_mapvalue.value)$operator lower('$value')"""
)
@@ -117,11 +150,11 @@ def substitute(self, **kwargs):
inner join dataset on datasetimagelink.parent=dataset.id
inner join projectdatasetlink on dataset.id=projectdatasetlink.child
inner join project on project.id=projectdatasetlink.parent
-where lower(project.name)=lower('$name')"""
+where lower (project.name) $operator lower ('$name')"""
)
# get images in a screen using id
-query_images_screen_id = Template(
+query_images_in_screen_id = Template(
"""
Select DISTINCT image.id from image
inner join wellsample on wellsample.image=image.id
@@ -141,7 +174,7 @@ def substitute(self, **kwargs):
inner join plate on well.plate=plate.id
inner join screenplatelink on plate.id=screenplatelink.child
inner join screen on screen.id=screenplatelink.parent
-where lower(screen.name)=lower('$name')"""
+where lower(screen.name)$operator lower('$name')"""
)
# get resource id using its name
@@ -179,6 +212,70 @@ def substitute(self, **kwargs):
and lower(annotation_mapvalue.value) =lower('$value')"""
)
+head_space_query = Template(
+ """
+select image.id as image_id, screen.name as screen_name, project.name as project_name,
+ annotation_mapvalue.name, annotation_mapvalue.value from image
+ inner join imageannotationlink on image.id =imageannotationlink.parent
+ inner join annotation_mapvalue on
+ annotation_mapvalue.annotation_id=imageannotationlink.child
+ left join datasetimagelink on datasetimagelink.child=image.id
+ left join dataset on datasetimagelink.parent=dataset.id
+ left join projectdatasetlink on dataset.id=projectdatasetlink.child
+ left join project on project.id=projectdatasetlink.parent
+ left join wellsample on wellsample.image=image.id
+ left join well on wellsample.well= well.id left join plate on well.plate=plate.id
+ left join screenplatelink on plate.id=screenplatelink.child
+ left join screen on screen.id=screenplatelink.parent
+ where annotation_mapvalue.value like ' %' $condition
+ group by project_name, screen_name,image.id,
+ annotation_mapvalue.name, annotation_mapvalue.value
+"""
+)
+
+trail_space_query = Template(
+ """
+select image.id as image_id, screen.name as screen_name, project.name as project_name,
+ annotation_mapvalue.name, annotation_mapvalue.value from image
+ inner join imageannotationlink on image.id =imageannotationlink.parent
+ inner join annotation_mapvalue on
+ annotation_mapvalue.annotation_id=imageannotationlink.child
+ left join datasetimagelink on datasetimagelink.child=image.id
+ left join dataset on datasetimagelink.parent=dataset.id
+ left join projectdatasetlink on dataset.id=projectdatasetlink.child
+ left join project on project.id=projectdatasetlink.parent
+ left join wellsample on wellsample.image=image.id
+ left join well on wellsample.well= well.id
+ left join plate on well.plate=plate.id
+ left join screenplatelink on plate.id=screenplatelink.child
+ left join screen on screen.id=screenplatelink.parent
+ where annotation_mapvalue.value like '% ' $condition
+ group by project_name, screen_name,image.id, annotation_mapvalue.name,
+ annotation_mapvalue.value
+"""
+)
+
+duplicated_keyvalue_pairs_query = Template(
+ """
+Select image.id as image_id, project.name as project_name, screen.name as screen_name,
+ annotation_mapvalue.name, annotation_mapvalue.value, count (*) from image
+ left join datasetimagelink on datasetimagelink.child=image.id
+ left join dataset on datasetimagelink.parent=dataset.id
+ left join projectdatasetlink on dataset.id=projectdatasetlink.child
+ left join project on project.id=projectdatasetlink.parent
+ left join wellsample on wellsample.image=image.id
+ left join well on wellsample.well= well.id left join plate on well.plate=plate.id
+ left join screenplatelink on plate.id=screenplatelink.child
+ left join screen on screen.id=screenplatelink.parent
+ inner join imageannotationlink on image.id =imageannotationlink.parent
+ inner join annotation_mapvalue on
+ annotation_mapvalue.annotation_id=imageannotationlink.child
+ $condition
+ group by project_name, screen_name,image.id, annotation_mapvalue.name,
+ annotation_mapvalue.value HAVING COUNT(*)>1
+ """
+)
+
project_key_values = Template(
"""
Select DISTINCT (annotation_mapvalue.value) from image
diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py
index f21a1325..fda5a205 100644
--- a/omero_search_engine/validation/results_validator.py
+++ b/omero_search_engine/validation/results_validator.py
@@ -18,21 +18,33 @@
# along with this program. If not, see .
from omero_search_engine import search_omero_app
+import json
from datetime import datetime
from omero_search_engine.api.v1.resources.query_handler import (
determine_search_results_,
query_validator,
simple_search,
)
+
+from omero_search_engine.api.v1.resources.resource_analyser import (
+ search_value_for_resource,
+ get_key_values_return_contents,
+)
+
from omero_search_engine.validation.psql_templates import (
query_images_key_value,
query_image_project_meta_data,
query_images_screen_key_value,
query_images_in_project_name,
query_images_screen_name,
- query_image_or,
+ query_image_in,
screens_count,
projects_count,
+ query_images_available_values_for_key,
+ query_images_any_value,
+ query_images_contains_not_contains,
+ query_images_in_project_id,
+ query_images_in_screen_id,
)
import os
@@ -42,9 +54,14 @@
"screen": query_images_screen_key_value,
"project_name": query_images_in_project_name,
"screen_name": query_images_screen_name,
- "query_image_or": query_image_or,
+ "query_image_or": query_image_in,
+ "in_clause": query_image_in,
+ "not_in_clause": query_image_in,
"screens_count": screens_count,
"projects_count": projects_count,
+ "available_values_for_key": query_images_available_values_for_key,
+ "search_any_value": query_images_any_value,
+ "image_contains_not_contains": query_images_contains_not_contains,
}
@@ -54,8 +71,9 @@ class Validator(object):
and from the searchengine
"""
- def __init__(self, deep_check):
+ def __init__(self, deep_check=False):
self.deep_check = deep_check
+ self.identical = True
def set_simple_query(self, resource, name, value, type="keyvalue"):
"""
@@ -69,6 +87,30 @@ def set_simple_query(self, resource, name, value, type="keyvalue"):
self.sql_statement = query_methods[resource]
self.searchengine_results = {}
+ def set_contains_not_contains_query(self, resource, name, value, type="keyvalue"):
+ """
+ simple query
+ """
+ self.resource = resource
+ self.type = type
+ self.name = name
+ self.value = value
+ self.postgres_results = []
+ self.sql_statement = query_methods["image_contains_not_contains"]
+ self.searchengine_results = {}
+
+ def set_owner_group(self, owner_id=None, group_id=None):
+ self.owner_id = owner_id
+ self.group_id = group_id
+
+ def set_in_query(self, clauses, resource="image", type="in_clause"):
+ """
+ in list query
+ """
+ self.type = type
+ self.clauses = clauses
+ self.resource = resource
+
def set_complex_query(self, name, clauses, resource="image", type="complex"):
"""
complex query
@@ -81,6 +123,27 @@ def set_complex_query(self, name, clauses, resource="image", type="complex"):
self.postgres_results = []
self.searchengine_results = {}
+ def get_in_sql(self, clauses, name="in_clause"):
+ names = "'%s'" % clauses[0].lower()
+ cases = [c.lower() for c in clauses[1]]
+ values = "'" + "','".join(cases) + "'"
+ if name == "in_clause":
+ sql = query_methods[name].substitute(
+ names=names, values=values, operator="in"
+ )
+ elif name == "not_in_clause":
+ sql = query_methods[name].substitute(
+ names=names, values=values, operator="not in"
+ )
+ # sql = query_methods[name].substitute(names=names, values=values)
+ conn = search_omero_app.config["database_connector"]
+ postgres_results = conn.execute_query(sql)
+ results = [item["id"] for item in postgres_results]
+ search_omero_app.logger.info(
+ "results for 'or' received %s" % len(results)
+ ) # noqa
+ return results
+
def get_or_sql(self, clauses, name="query_image_or"):
names = ""
values = ""
@@ -91,14 +154,15 @@ def get_or_sql(self, clauses, name="query_image_or"):
else:
names = "'%s'" % claus[0].lower()
values = "'%s'" % claus[1].lower()
- sql = query_methods[name].substitute(names=names, values=values)
+ # sql = query_methods[name].substitute(names=names, values=values)
+ sql = query_methods[name].substitute(names=names, values=values, operator="in")
conn = search_omero_app.config["database_connector"]
statement_timeout = search_omero_app.config["STATEMENT_TIMEOUT"]
postgres_results = conn.execute_query(sql, statement_timeout=statement_timeout)
results = [item["id"] for item in postgres_results]
search_omero_app.logger.info(
- "results for or received %s" % len(results)
+ "results for 'or' received %s" % len(results)
) # noqa
return results
@@ -107,7 +171,10 @@ def get_and_sql(self, clauses):
co = 0
for claus in clauses:
sql = query_methods["image"].substitute(
- name=claus[0].lower(), value=claus[1].lower()
+ # toz
+ operator="=",
+ name=claus[0].lower(),
+ value=claus[1].lower(),
)
conn = search_omero_app.config["database_connector"]
statement_timeout = search_omero_app.config["STATEMENT_TIMEOUT"]
@@ -115,9 +182,7 @@ def get_and_sql(self, clauses):
sql, statement_timeout=statement_timeout
)
res = [item["id"] for item in postgres_results]
- search_omero_app.logger.info(
- "results for and received recived %s" % len(res)
- )
+ search_omero_app.logger.info("results for 'and' received %s" % len(res))
if co == 0:
results = res
else:
@@ -125,12 +190,30 @@ def get_and_sql(self, clauses):
co += 1
return results
- def get_results_postgres(self):
+ def get_results_db(self, operator=None):
"""
Query the postgresql
"""
search_omero_app.logger.info("Getting results from postgres")
- if self.type == "complex":
+ if self.type == "buckets":
+ if self.name:
+ sql = query_methods["available_values_for_key"].substitute(
+ name=self.name
+ )
+ conn = search_omero_app.config["database_connector"]
+ self.postgres_results = conn.execute_query(sql)
+ elif self.value:
+ sql = query_methods["search_any_value"].substitute(val_part=self.value)
+ conn = search_omero_app.config["database_connector"]
+ self.postgres_results = conn.execute_query(sql)
+ return
+ if self.type == "in_clause":
+ self.postgres_results = self.get_in_sql(self.clauses)
+ return
+ elif self.type == "not_in_clause":
+ self.postgres_results = self.get_in_sql(self.clauses, self.type)
+ return
+ elif self.type == "complex":
if self.name == "query_image_or":
self.postgres_results = self.get_or_sql(self.clauses)
elif self.name == "query_image_and":
@@ -146,12 +229,30 @@ def get_results_postgres(self):
)
return
else:
+ if not operator or operator == "equals":
+ operator = "="
+ elif operator == "not_equals":
+ operator = "!="
+ elif operator == "contains":
+ operator = "like"
+ elif operator == "not_contains":
+ operator = "not like"
+
if self.name != "name":
sql = self.sql_statement.substitute(
- name=self.name.lower(), value=self.value.lower()
+ # toz
+ operator=operator,
+ name=self.name.lower(),
+ value=self.value.lower(),
)
else:
- sql = self.sql_statement.substitute(name=self.value)
+ sql = self.sql_statement.substitute(name=self.value, operator=operator)
+
+ if hasattr(self, "owner_id") and self.owner_id:
+ sql = sql + " and %s.owner_id=%s" % (self.resource, self.owner_id)
+ if hasattr(self, "group_id") and self.group_id:
+ sql = sql + " and %s.group_id=%s" % (self.resource, self.group_id)
+ print(sql)
# search_omero_app.logger.info ("sql: %s"%sql)
conn = search_omero_app.config["database_connector"]
statement_timeout = search_omero_app.config["STATEMENT_TIMEOUT"]
@@ -161,11 +262,45 @@ def get_results_postgres(self):
"results received %s" % len(self.postgres_results)
) # noqa
- def get_results_searchengine(self):
+ def get_results_searchengine(self, operator=None):
"""
Query the results from the serachengine
"""
- if self.type == "complex":
+ if self.type == "buckets":
+ if self.name:
+ res = get_key_values_return_contents(self.name, "image", False)
+ self.searchengine_results = json.loads(res.data)
+ elif self.value:
+ self.searchengine_results = search_value_for_resource(
+ "image", self.value
+ )
+ return
+
+ if self.type == "in_clause":
+ filters = []
+ filters.append(
+ {
+ "name": self.clauses[0],
+ "value": self.clauses[1],
+ "operator": "in",
+ "resource": self.resource,
+ }
+ )
+ query = {"and_filters": filters, "or_filters": []}
+
+ elif self.type == "not_in_clause":
+ filters = []
+ filters.append(
+ {
+ "name": self.clauses[0],
+ "value": self.clauses[1],
+ "operator": "not_in",
+ "resource": self.resource,
+ }
+ )
+ query = {"and_filters": filters, "or_filters": []}
+
+ elif self.type == "complex":
filters = []
if self.name != "query_image_and_or":
for claus in self.clauses:
@@ -200,14 +335,15 @@ def get_results_searchengine(self):
"resource": self.resource,
}
)
-
else:
+ if not operator:
+ operator = "equals"
if self.name != "name":
and_filters = [
{
"name": self.name.lower(),
"value": self.value.lower(),
- "operator": "equals",
+ "operator": operator,
"resource": self.resource,
}
]
@@ -217,11 +353,21 @@ def get_results_searchengine(self):
"name": "name",
"value": self.value,
"resource": "project",
- "operator": "equals",
+ "operator": operator,
}
]
query = {"and_filters": and_filters, "or_filters": []}
- query_data = {"query_details": query}
+ and_main_attributes = []
+ if hasattr(self, "owner_id") and self.owner_id:
+ and_main_attributes.append(
+ {"name": "owner_id", "value": self.owner_id, "operator": "equals"}
+ )
+ if hasattr(self, "group_id") and self.group_id:
+ and_main_attributes.append(
+ {"name": "group_id", "value": self.group_id, "operator": "equals"}
+ )
+ main_attributes = {"and_main_attributes": and_main_attributes}
+ query_data = {"query_details": query, "main_attributes": main_attributes}
# validate the query syntex
query_validation_res = query_validator(query_data)
if query_validation_res == "OK":
@@ -334,7 +480,10 @@ def get_containers_test_cases(self):
None,
return_containers=True,
)
- if search_engine_results["results"].get("results"):
+ # print(search_engine_results["results"])
+ if search_engine_results.get("results") and search_engine_results[
+ "results"
+ ].get("results"):
for item in search_engine_results["results"].get("results"):
if item["type"] == "screen":
if item["name"] in screens_results_idr:
@@ -392,42 +541,43 @@ def get_containers_test_cases(self):
search_omero_app.logger.info(mes)
return mess
- def compare_results(self):
+ def compare_results(self, operator=None):
"""
- call the results
+ Get and compare the results between the database and the searchengine
"""
st_time = datetime.now()
- self.get_results_postgres()
+ self.get_results_db(operator)
st2_time = datetime.now()
- self.get_results_searchengine()
+ self.get_results_searchengine(operator)
st3_time = datetime.now()
sql_time = st2_time - st_time
searchengine_time = st3_time - st2_time
+ if self.type == "bucket":
+ return
if len(self.postgres_results) == self.searchengine_results.get("size"):
- ids_in = True
is_it_repated = []
serach_ids = [id for id in self.searchengine_results.get("ids")]
serach_idsp = [id for id in self.searchengine_results.get("idsp")]
if self.deep_check:
if sorted(serach_ids) != sorted(self.postgres_results):
- ids_in = False
+ self.identical = False
if sorted(serach_idsp) != sorted(serach_ids):
- ids_in = False
+ self.identical = False
else:
if sorted(serach_idsp) != sorted(serach_ids):
- ids_in = False
+ self.identical = False
else:
for id in serach_ids:
if id in is_it_repated:
- ids_in = False
+ self.identical = False
break
else:
is_it_repated.append(id)
if id not in self.postgres_results:
- ids_in = False
+ self.identical = False
break
- if ids_in:
+ if self.identical:
search_omero_app.logger.info(
"No of the retuned results are similar ..."
)
@@ -440,32 +590,12 @@ def compare_results(self):
searchengine_no = self.searchengine_results.get("size")
else:
searchengine_no = self.searchengine_results
- if not self.deep_check:
- return (
- "not equal, database no of the results from server is: %s and\
- the number of results from searchengine (bookmark) is %s?,\
- \ndatabase server query time= %s, searchengine query time= %s"
- % (
- len(self.postgres_results),
- searchengine_no,
- sql_time,
- searchengine_time,
- )
- )
- else:
- return (
- "not equal, database no of the results from server is: %s and\
- the number of results from searchengine (bookmark) is %s?,\
- the number of results from searchengine (pagination) is %s?,\
- \ndatabase server query time= %s, searchengine query time= %s"
- % (
- len(self.postgres_results),
- searchengine_no,
- len(serach_idsp),
- sql_time,
- searchengine_time,
- )
- )
+ return (
+ "not equal, the number of results from the database server is: %s and"
+ "the number of results from searchengine is %s?,"
+ "\ndatabase server query time= %s, searchengine query time= %s"
+ % (len(self.postgres_results), searchengine_no, sql_time, searchengine_time)
+ )
def validate_queries(json_file, deep_check):
@@ -483,6 +613,7 @@ def validate_queries(json_file, deep_check):
test_cases = test_data.get("test_cases")
complex_test_cases = test_data.get("complex_test_cases")
+ query_in = test_data.get("query_in")
messages = []
from datetime import datetime
@@ -492,23 +623,40 @@ def validate_queries(json_file, deep_check):
name = case[0]
value = case[1]
search_omero_app.logger.info(
- "Testing %s for name: %s, key: %s" % (resource, name, value)
+ "Testing (equals) %s for name: %s, key: %s" % (resource, name, value)
)
validator = Validator(deep_check)
validator.set_simple_query(resource, name, value)
if resource == "image":
mess = validator.get_containers_test_cases()
messages = messages + mess
-
- res = validator.compare_results()
+ res = validator.compare_results("equals")
elabsed_time = str(datetime.now() - start_time)
messages.append(
- "Results from PostgreSQL and search engine for "
- "name '%s', value '%s', are: %s"
+ "Results form (equals) the database and search engine"
+ "for name: %s , value: %s are: %s"
% (validator.name, validator.value, res)
)
search_omero_app.logger.info("Total time=%s" % elabsed_time)
+ # Not equals
+ start_time = datetime.now()
+ search_omero_app.logger.info(
+ "Testing (not equals) %s for name: %s, key: %s"
+ % (resource, name, value)
+ )
+ if resource == "image":
+ not_equals_validator = Validator(deep_check)
+ not_equals_validator.set_simple_query(resource, name, value)
+ res = not_equals_validator.compare_results("not_equals")
+ elabsed_time = str(datetime.now() - start_time)
+ messages.append(
+ "Results (not_equals) form PostgreSQL and search engine"
+ "for name: %s , value: %s are: %s"
+ % (not_equals_validator.name, not_equals_validator.value, res)
+ )
+ search_omero_app.logger.info("Total time=%s" % elabsed_time)
+
for name, cases_ in complex_test_cases.items():
for cases in cases_:
start_time = datetime.now()
@@ -523,6 +671,42 @@ def validate_queries(json_file, deep_check):
search_omero_app.logger.info(
"Total time=%s" % str(datetime.now() - start_time)
)
+
+ for resource, cases in query_in.items():
+ for case in cases:
+ start_time = datetime.now()
+ validator_in = Validator(deep_check)
+ validator_in.set_in_query(case, resource)
+ res = validator_in.compare_results()
+ messages.append(
+ "Results for 'in' from the database and search engine"
+ "for %s name: %s and value in [%s] are %s"
+ % (
+ validator_in.resource,
+ validator_in.clauses[0],
+ ",".join(validator_in.clauses[1]),
+ res,
+ )
+ )
+ end_in = datetime.now()
+ search_omero_app.logger.info("Total time=%s" % str(end_in - start_time))
+ # test the same but change the operator to not in
+ search_omero_app.logger.info("Total time=%s" % str(end_in - start_time))
+ validator_not_in = Validator(deep_check)
+ validator_not_in.set_in_query(case, resource, type="not_in_clause")
+ res = validator_not_in.compare_results()
+ messages.append(
+ "Results for 'not in' from the database and search engine for %s name: "
+ "%s and value in [%s] are %s"
+ % (
+ validator_not_in.resource,
+ validator_not_in.clauses[0],
+ ",".join(validator_not_in.clauses[1]),
+ res,
+ )
+ )
+ search_omero_app.logger.info("Total time=%s" % str(datetime.now() - end_in))
+
search_omero_app.logger.info(
"############################################## Check Report ##############################################" # noqa
)
@@ -535,7 +719,7 @@ def validate_queries(json_file, deep_check):
"###########################################################################################################" # noqa
)
# save the check report to a text file
- base_folder = "/etc/searchengine/"
+ base_folder = search_omero_app.config.get("BASE_FOLDER")
if not os.path.isdir(base_folder):
base_folder = os.path.expanduser("~")
@@ -550,6 +734,8 @@ def validate_queries(json_file, deep_check):
def test_no_images():
idr_url = search_omero_app.config.get("IDR_TEST_FILE_URL")
+ if not idr_url:
+ return
if not idr_url:
search_omero_app.logger.info("No idr test file is found")
@@ -566,7 +752,6 @@ def test_no_images():
headers = lines[0]
headers = headers.split("\t")
- print(len(headers))
for i in range(len(headers) - 1):
print(i, headers[i])
names = {}
@@ -580,7 +765,7 @@ def test_no_images():
names[name] = int(study[9])
results = {}
- base_folder = "/etc/searchengine/"
+ base_folder = search_omero_app.config.get("BASE_FOLDER")
if not os.path.isdir(base_folder):
base_folder = os.path.expanduser("~")
@@ -643,7 +828,7 @@ def test_no_images():
def get_omero_stats():
values = ["Attribute", "No. buckets", "Total number", "Resource"]
- base_folder = "/etc/searchengine/"
+ base_folder = search_omero_app.config.get("BASE_FOLDER")
if not os.path.isdir(base_folder):
base_folder = os.path.expanduser("~")
stats_file = os.path.join(base_folder, "stats.csv")
@@ -692,12 +877,107 @@ def get_omero_stats():
f.write(report)
-def get_no_images_sql_containers():
+def check_number_images_sql_containers_using_ids():
"""
This method tests the number of images inside each container
(project or screen) in the searchengine index data
and compare them with the number of images inside
- each container in the postgresql database server
+ each container in the database server.
+ As container name is not unique, container id is used
+ to determine the number of images
+ """
+ from omero_search_engine.api.v1.resources.urls import (
+ get_resource_names,
+ )
+ from omero_search_engine.api.v1.resources.utils import (
+ search_resource_annotation,
+ )
+
+ dd = True
+
+ conn = search_omero_app.config["database_connector"]
+ all_names = get_resource_names("all")
+ for resource in all_names:
+ search_omero_app.logger.info(
+ "######################## Checking %s ########################\n" % resource
+ )
+ for res_name_ in all_names.get(resource):
+ res_name = res_name_.get("name")
+ res_id = res_name_.get("id")
+ search_omero_app.logger.info(
+ "Checking %s name: %s, id: %s" % (resource, res_name, res_id)
+ )
+ and_filters = []
+ main_attributes = {
+ "and_main_attributes": [
+ {
+ "name": "%s_id" % resource,
+ "value": res_id,
+ "operator": "equals",
+ "resource": "image",
+ }
+ ]
+ }
+ or_filters = []
+ query = {"and_filters": and_filters, "or_filters": or_filters}
+
+ query_data = {"query_details": query, "main_attributes": main_attributes}
+
+ returned_results = search_resource_annotation("image", query_data)
+ if returned_results.get("results"):
+ if returned_results.get("results").get("size"):
+ searchengine_results = returned_results["results"]["size"]
+ else:
+ searchengine_results = 0
+ search_omero_app.logger.info(
+ "Number of images returned from searchengine: %s" % searchengine_results
+ )
+ if resource == "project":
+ sql = query_images_in_project_id.substitute(project_id=res_id)
+ elif resource == "screen":
+ sql = query_images_in_screen_id.substitute(screen_id=res_id)
+ results = conn.execute_query(sql)
+ postgres_results = len(results)
+ search_omero_app.logger.info(
+ "Number of images returned from the database: %s" % postgres_results
+ )
+ if searchengine_results != postgres_results:
+ if res_name == "idr0021" and res_id == 872:
+ # """
+ # issue with these two images:
+ # as they belong to two different datasets
+ # image ids= 9539, 9552
+ # """
+ continue
+ dd = False
+ if searchengine_results > 0:
+ test_array = []
+ for res in returned_results["results"]["results"]:
+ test_array.append(res.get("id"))
+ for ress in results:
+ if ress["id"] not in test_array:
+ print("================>>>>")
+ print(ress["id"])
+ search_omero_app.logger.info("ERROR: Not equal results")
+ print(
+ "Error checking %s name: %s, id: %s"
+ % (resource, res_name, res_id)
+ )
+ # return False
+ else:
+ search_omero_app.logger.info("equal results")
+ search_omero_app.logger.info(
+ "\n-----------------------------------------------------------------------------\n" # noqa
+ )
+ return dd
+
+
+def get_no_images_sql_containers(write_report=True):
+ """
+ This method tests the number of images inside each container
+ (project or screen) in the searchengine index data
+ and compare them with the number of images inside
+ each container in the database server
"""
from omero_search_engine.api.v1.resources.urls import (
get_resource_names,
@@ -742,10 +1022,13 @@ def get_no_images_sql_containers():
)
search_omero_app.logger.info(message2)
messages.append(message2)
- sql = query_methods["%s_name" % resource].substitute(name=res_name)
+ sql = query_methods["%s_name" % resource].substitute(
+ name=res_name, operator="="
results = conn.execute_query(sql, statement_timeout=statement_timeout)
postgres_results = len(results)
- message3 = "No of images returned from postgresql: %s" % seachengine_results
+ message3 = (
+ "Number of images returned from the database: %s" % postgres_results
+ )
messages.append(message3)
search_omero_app.logger.info(message3)
if seachengine_results != postgres_results:
@@ -759,14 +1042,26 @@ def get_no_images_sql_containers():
messages.append(
"\n-----------------------------------------------------------------------------\n" # noqa
)
- base_folder = "/etc/searchengine/"
- if not os.path.isdir(base_folder):
- base_folder = os.path.expanduser("~")
+ if write_report:
+ base_folder = search_omero_app.config.get("BASE_FOLDER")
+ if not os.path.isdir(base_folder):
+ base_folder = os.path.expanduser("~")
+ report_file = os.path.join(base_folder, "check_containers_report.txt")
+ report = "\n".join(messages) # noqa
+ with open(report_file, "w") as f:
+ f.write(report)
- report_file = os.path.join(base_folder, "check_containers_report.txt")
- report = "\n".join(messages) # noqa
- with open(report_file, "w") as f:
- f.write(report)
+
+"""
+def set_ownership(resource , name, value, owner_id=None, group_id=None):
+ if hasattr(self, 'owener_id'):
+ if hasattr(self, 'group_id'):
+ sql=query_images_key_value.substitute(name=name, value=value)
+ if owner_id:
+ sql=sql +" %s.%owner_id=%s"%(resource,owner_id)
+ if group_id:
+ sql = sql + " %s.%group_id=%s" % (resource, group_id)
+"""
def check_container_keys_vakues():
@@ -821,7 +1116,7 @@ def check_container_keys_vakues():
else:
scr_searchengine_results = scr_searchengine_results.response
search_omero_app.logger.info(
- "Results from PostgreSQL database: %s" % len(screen_results)
+ "Results from the database: %s" % len(screen_results)
)
if len(scr_searchengine_results) > 0 and scr_searchengine_results[
0
@@ -853,7 +1148,7 @@ def check_container_keys_vakues():
pr_searchengine_results = pr_searchengine_results.response
search_omero_app.logger.info(
- "Results from PostgreSQL database: %s" % len(project_results)
+ "Results from the database: %s" % len(project_results)
)
if len(pr_searchengine_results) > 0 and pr_searchengine_results[
0
diff --git a/unit_tests/__init__.py b/unit_tests/__init__.py
new file mode 100644
index 00000000..51828773
--- /dev/null
+++ b/unit_tests/__init__.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2024 University of Dundee & Open Microscopy Environment.
+# All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
diff --git a/unit_tests/test_app.py b/unit_tests/test_app.py
index 154912ec..6659861b 100644
--- a/unit_tests/test_app.py
+++ b/unit_tests/test_app.py
@@ -34,6 +34,10 @@
key_values_resource_cache_template,
)
+from omero_search_engine.validation.results_validator import (
+ Validator,
+ check_number_images_sql_containers_using_ids,
+)
from omero_search_engine.cache_functions.elasticsearch.transform_data import (
delete_es_index,
create_index,
@@ -46,11 +50,24 @@
not_valid_and_filters,
not_valid_or_filters,
query,
+ query_image_and,
+ query_image_or,
+ query_image_and_or,
+ simple_queries,
+ query_in,
+ images_keys,
+ images_value_parts,
+ contains_not_contains_queries,
+ image_owner,
+ image_group,
+ image_owner_group,
)
from omero_search_engine import search_omero_app, create_app
create_app("testing")
+# deep_check should be a configuration item
+deep_check = True
class BasicTestCase(unittest.TestCase):
@@ -134,8 +151,6 @@ def test_add_submit_query_delete_es_index(self):
es_index_2 = "key_values_resource_cach"
create_es_index_2 = True
all_all_indices = get_all_indexes_from_elasticsearch()
- print(all_all_indices)
- print(all_all_indices.keys())
if es_index_2 in all_all_indices.keys():
create_es_index_2 = False
@@ -146,12 +161,189 @@ def test_add_submit_query_delete_es_index(self):
create_index(es_index_2, key_values_resource_cache_template)
)
res = search_resource_annotation(table, query)
- print(res)
assert len(res.get("results")) >= 0
self.assertTrue(delete_es_index(es_index))
if create_es_index_2:
self.assertTrue(delete_es_index(es_index_2))
+ def test_single_query(self):
+ """
+ test query the search engine and compare
+ its results with the results from the database
+ """
+ for resource, cases in simple_queries.items():
+ for case in cases:
+ name = case[0]
+ value = case[1]
+ validator = Validator(deep_check)
+ validator.set_simple_query(resource, name, value)
+ validator.get_results_db("equals")
+ validator.get_results_searchengine("equals")
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+ validator.get_results_db("not_equals")
+ validator.get_results_searchengine("not_equals")
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+ self.assertTrue(validator.identical)
+
+ def test_and_query(self):
+ name = "query_image_and"
+ for cases in query_image_and:
+ validator = Validator(deep_check)
+ validator.set_complex_query(name, cases)
+ validator.compare_results()
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+ self.assertTrue(validator.identical)
+
+ def test_or_query(self):
+ name = "query_image_or"
+ for cases in query_image_or:
+ validator = Validator(deep_check)
+ validator.set_complex_query(name, cases)
+ validator.compare_results()
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+ self.assertTrue(validator.identical)
+
+ def test_no_images_containers(self):
+ self.assertTrue(check_number_images_sql_containers_using_ids())
+
+ def test_multi_or_quries(self):
+ pass
+
+ def test_complex_query(self):
+ name = "query_image_and_or"
+ for cases in query_image_and_or:
+ validator = Validator(deep_check)
+ validator.set_complex_query(name, cases)
+ validator.compare_results()
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+ self.assertTrue(validator.identical)
+
+ def test_in_query(self):
+ for resource, cases in query_in.items():
+ for case in cases:
+ validator = Validator(deep_check)
+ validator.set_in_query(case, resource)
+ validator.compare_results()
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+ self.assertTrue(validator.identical)
+
+ def test_not_in_query(self):
+ for resource, cases in query_in.items():
+ for case in cases:
+ validator = Validator(deep_check)
+ validator.set_in_query(case, resource, type="not_in_clause")
+ validator.compare_results()
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+ self.assertTrue(validator.identical)
+
+ def test_seach_for_any_value(self):
+ for part in images_value_parts:
+ validator = Validator(deep_check)
+ validator.set_simple_query("image", None, part, type="buckets")
+ validator.compare_results()
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("total_number_of_buckets"),
+ )
+
+ def test_available_values_for_key(self):
+ for image_key in images_keys:
+ validator = Validator(deep_check)
+ validator.set_simple_query("image", image_key, None, type="buckets")
+ validator.compare_results()
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("total_number_of_buckets"),
+ )
+
+ def test_contains_not_contains_queries(self):
+ for resource, cases in contains_not_contains_queries.items():
+ for case in cases:
+ name = case[0]
+ value = case[1]
+ validator = Validator(deep_check)
+ validator.set_contains_not_contains_query(resource, name, value)
+ validator.get_results_db("contains")
+ validator.get_results_searchengine("contains")
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+ validator.get_results_db("not_contains")
+ validator.get_results_searchengine("not_contains")
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+ self.assertTrue(validator.identical)
+
+ def test_owner(self):
+ for resource, cases in image_owner.items():
+ for case in cases:
+ name = case[0]
+ value = case[1]
+ owner_id = case[2]
+ validator = Validator(deep_check)
+ validator.set_simple_query(resource, name, value)
+ validator.set_owner_group(owner_id=owner_id)
+ validator.compare_results()
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+
+ def test_group(self):
+ for resource, cases in image_group.items():
+ for case in cases:
+ name = case[0]
+ value = case[1]
+ group_id = case[2]
+ validator = Validator(deep_check)
+ validator.set_simple_query(resource, name, value)
+ validator.set_owner_group(group_id=group_id)
+ validator.compare_results()
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+
+ def test_owner_group(self):
+ for resource, cases in image_owner_group.items():
+ for case in cases:
+ name = case[0]
+ value = case[1]
+ owner_id = case[2]
+ group_id = case[3]
+ validator = Validator(deep_check)
+ validator.set_simple_query(resource, name, value)
+ validator.set_owner_group(owner_id=owner_id, group_id=group_id)
+ validator.compare_results()
+ self.assertEqual(
+ len(validator.postgres_results),
+ validator.searchengine_results.get("size"),
+ )
+
# def test_add_delete_es_index(self):
# '''
# test create index in elastic search
diff --git a/unit_tests/test_data.py b/unit_tests/test_data.py
index f081bf94..0a791446 100644
--- a/unit_tests/test_data.py
+++ b/unit_tests/test_data.py
@@ -52,3 +52,54 @@
# "operator": "equals", "resource": "image"}]}}
query = {"query_details": {"and_filters": []}}
+
+query_image_and = [
+ [["Phenotype Annotation Level", "protein"], ["organism", "homo sapiens"]]
+]
+
+query_image_or = [[["Gene Symbol", "CDK5RAP2"], ["Gene Symbol", "cep120"]]]
+
+query_image_and_or = [
+ {
+ "query_image_and": [
+ ["Organism", "homo sapiens"],
+ ["Targeted Protein", "CDK5RAP2"],
+ ["Phenotype Term Accession", "CMPO_0000425"],
+ ],
+ "query_image_or": [
+ ["Phenotype", "protein localized to centrosome"],
+ ["Gene Symbol", "http://www.ebi.ac.uk/cmpo/CMPO_0000425"],
+ ],
+ }
+]
+
+simple_queries = {
+ "image": [
+ ["cell line", "Hela"],
+ ["PBS", "10Mm"],
+ ["Gene Symbol", "CDK5RAP2"],
+ ["organism", "homo sapiens"],
+ ["temperature", "37"],
+ ]
+}
+
+contains_not_contains_queries = {
+ "image": [["cell line", "hel"], ["gene symbol", "cep"]]
+}
+
+query_in = {
+ "image": [
+ ["Gene Symbol", ["pcnt", "cenpj", "cep120", "cdk5rap2"]],
+ ["temperature", ["23 c", "37 c"]],
+ ]
+}
+
+image_owner = {"image": [["cell line", "Hela", 103]]}
+
+image_group = {"image": [["cell line", "Hela", 5]]}
+
+image_owner_group = {"image": [["gene symbol", "cep120", 702, 5]]}
+
+images_keys = ["cell line", "gene symbol"]
+
+images_value_parts = ["he", "pr"]