Skip to content

Commit

Permalink
Merge branch 'main' into statement_timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
khaledk2 authored Jul 25, 2024
2 parents e572692 + 547bead commit 6b2469b
Show file tree
Hide file tree
Showing 19 changed files with 1,257 additions and 147 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ jobs:
python manage.py set_database_configuration -u localhost -s ${{ job.services.postgres.ports[5432] }} -n postgress -p passwprd
# configure elasticsearch
python manage.py set_elasticsearch_configuration -e localhost:${{ job.services.elasticsearch.ports[9200] }}
# download and extract the database backup file
wget https://downloads.openmicroscopy.org/images/omero_db_searchengine.zip -P app_data
unzip app_data/omero_db_searchengine.zip -d app_data/
# run restore omero database
python manage.py restore_postgresql_database
# run indexing indexing
python manage.py get_index_data_from_database -b False
# run tests
python -m unittest discover -s unit_tests
upload:
Expand Down
24 changes: 23 additions & 1 deletion app_data/test_index_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -96,5 +96,27 @@
"validation screen"
]
]
}
},
"query_in": {
"image": [
[
"Gene Symbol",
[
"Duoxa2",
"Bach2",
"Cxcr2",
"Mysm1"
]
],
[
"Organism",
[
"homo sapiens",
"mus musculus",
"mus musculus x mus spretus",
"human adenovirus 2"
]
]
]
}
}
1 change: 1 addition & 0 deletions configurations/app_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ ELASTICSEARCH_BACKUP_FOLDER: "path/to/elasticsearch/backup/folder"
verify_certs: False
ELASTIC_PASSWORD: elasticsearch_user_password
STATEMENT_TIMEOUT: 5000
BASE_FOLDER: /etc/searchengine/
8 changes: 6 additions & 2 deletions examples/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,9 @@ def call_omero_return_results(url, data=None, method="post"):
% (len(received_results), total_results, page, total_pages, bookmark)
)

# 2000 /11686633, page: 1/11687, bookmark: 109600
# 2000 /12225067, page: 1/12226, bookmark: 109600
# another example using in operators and send items inside value as a string,
# The List items are separated by ','
logging.info("Using in operator")
url = "%s%s?key=Gene Symbol&value=Pdgfc,Rnase10&operator=in" % (base_url, image_search)
bookmark, total_results, total_pages = call_omero_return_results(url, method="get")
logging.info("%s,%s" % (total_results, total_pages))
42 changes: 42 additions & 0 deletions examples/using_in_operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (C) 2024 University of Dundee & Open Microscopy Environment.
# All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


from utils import query_the_search_ending, logging

# It is similar to use the 'in' operator in a sql statement,
# rather than having multiple 'or' conditions,
# it will only use a single condition.

# The following example will search for the images which have any of the 'Gene Symbol'
# values in this list ["Duoxa2", "Bach2", "Cxcr2", "Mysm1"]

# and filters

logging.info("Example of using in operator")


values_in = ["Duoxa2", "Bach2", "Cxcr2", "Mysm1"]
logging.info("Searching for 'Gene Symbol' with values in [%s]" % (",".join(values_in)))
and_filters = [{"name": "Gene Symbol", "value": values_in, "operator": "in"}]

main_attributes = []
query = {"and_filters": and_filters}
#
recieved_results_data = query_the_search_ending(query, main_attributes)
48 changes: 48 additions & 0 deletions examples/using_not_in_operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (C) 2024 University of Dundee & Open Microscopy Environment.
# All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


from utils import query_the_search_ending, logging

# It is similar to use the 'not in' operator in a sql statement,
# rather than having multiple 'or' conditions with not_equals operators,
# it will only use a single condition.

# The following example will search for the images which have met any of the 'Organism'
# values in this list
# ["homo sapiens","mus musculus","mus musculus x mus spretus","human adenovirus 2"]

# and filters

logging.info("Example of using not_in operator")


values_not_in = [
"homo sapiens",
"mus musculus",
"mus musculus x mus spretus",
"human adenovirus 2",
]
logging.info("Searching for 'Organism' with values in [%s]" % (",".join(values_not_in)))
and_filters = [{"name": "Organism", "value": values_not_in, "operator": "not_in"}]

main_attributes = []
query = {"and_filters": and_filters}
#
received_results_data = query_the_search_ending(query, main_attributes)
60 changes: 58 additions & 2 deletions manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,25 @@ def sql_results_to_panda():
pass


@manager.command
def restore_postgresql_database():
from omero_search_engine.database.utils import restore_database

restore_database()


@manager.command
@manager.option(
"-r",
"--resource",
help="resource name, creating all the indexes for all the resources is the default", # noqa
)
def get_index_data_from_database(resource="all"):
@manager.option(
"-b",
"--backup",
help="if True, backup will be called ", # noqa
)
def get_index_data_from_database(resource="all", backup="True"):
"""
insert data in Elasticsearch index for each resource
It gets the data from postgres database server
Expand All @@ -132,7 +144,9 @@ def get_index_data_from_database(resource="all"):
get_insert_data_to_index,
save_key_value_buckets,
)
import json

backup = json.loads(backup.lower())
if resource != "all":
sql_st = sqls_resources.get(resource)
if not sql_st:
Expand All @@ -148,7 +162,8 @@ def get_index_data_from_database(resource="all"):
test_indexing_search_query(deep_check=False, check_studies=True)

# backup the index data
backup_elasticsearch_data()
if backup:
backup_elasticsearch_data()


# set configurations
Expand Down Expand Up @@ -360,6 +375,44 @@ def restore_elasticsearch_data():
restore_indices_data()


@manager.command
@manager.option("-s", "--screen_name", help="Screen name, or part of it")
@manager.option("-p", "--project_name", help="Project name, or part of it")
def data_validator(screen_name=None, project_name=None):
"""
Checking key-value pair for trailing and heading space.
It also checks the key-value pair duplication.
It can check all the projects and screens.
Also, it can run for a specific project or screen.
The output is a collection of CSV files; each check usually generates three files:
The main file contains image details (e.g. image id)
in addition to the key and the value.
one file for screens and one for projects.
Each file contains the screen name (project name),
the key-value pair which has the issue and the total number of affected
images for each row.
"""
from datetime import datetime

if screen_name and project_name:
print("Either screen name or project name is allowed")

from omero_search_engine.validation.omero_keyvalue_data_validator import (
check_for_heading_space,
check_for_trailing_space,
check_duplicated_keyvalue_pairs,
)

start = datetime.now()
check_for_trailing_space(screen_name, project_name)
start1 = datetime.now()
check_for_heading_space(screen_name, project_name)
start2 = datetime.now()
check_duplicated_keyvalue_pairs(screen_name, project_name)
end = datetime.now()
print("start: %s, start1: %s, start2: %s, end: %s" % (start, start1, start2, end))


@manager.command
def test_container_key_value():
from omero_search_engine.validation.results_validator import (
Expand All @@ -370,4 +423,7 @@ def test_container_key_value():


if __name__ == "__main__":
from flask_script import Command

Command.capture_all_args = False
manager.run()
25 changes: 21 additions & 4 deletions omero_search_engine/api/v1/resources/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
"screen": {"name": "name", "description": "description"},
}

res_and_main_attributes = None
res_or_main_attributes = None


def check_get_names(idr_, resource, attribute, return_exact=False):
# check the idr name and return the resource and possible values
Expand Down Expand Up @@ -107,8 +110,10 @@ def adjust_resource(self):
)
if len(ac_value) == 1:
self.value = ac_value[0]
else:
elif len(ac_value) == 0:
self.value = -1
else:
self.value = ac_value
"""
pr_names = get_resource_names(self.resource)
if not self.value in pr_names:
Expand Down Expand Up @@ -337,6 +342,7 @@ def get_image_non_image_query(self):

def run_query(self, query_, resource):
main_attributes = {}

query = {"and_filters": [], "or_filters": []}

if query_.get("and_filters"):
Expand Down Expand Up @@ -398,6 +404,11 @@ def run_query(self, query_, resource):
# res = search_query(query, resource, bookmark,
# self.raw_elasticsearch_query,
# main_attributes,return_containers=self.return_containers)
global res_and_main_attributes, res_or_main_attributes
if res_and_main_attributes:
main_attributes["and_main_attributes"] = (
main_attributes.get("and_main_attributes") + res_and_main_attributes
)
if resource == "image" and self.return_containers:
res = search_query(
query,
Expand Down Expand Up @@ -633,6 +644,12 @@ def determine_search_results_(query_, return_columns=False, return_containers=Fa
and_filters = query_.get("query_details").get("and_filters")
or_filters = query_.get("query_details").get("or_filters")
and_query_groups = []
main_attributes = query_.get("main_attributes")
global res_and_main_attributes, res_or_main_attributes
if main_attributes:
res_and_main_attributes = main_attributes.get("and_main_attributes")
res_or_main_attributes = main_attributes.get("or_main_attributes")

columns_def = query_.get("columns_def")
or_query_groups = []
if and_filters and len(and_filters) > 0:
Expand Down Expand Up @@ -785,9 +802,9 @@ def add_local_schemas_to(resolver, schema_folder, base_uri, schema_ext=".json"):


def query_validator(query):
query_schema_file = (
"omero_search_engine/api/v1/resources/schemas/query_data.json" # noqa
)
print("TRoz", query)
main_dir = os.path.abspath(os.path.dirname(__file__))
query_schema_file = os.path.join(main_dir, "schemas", "query_data.json")
base_uri = "file:" + abspath("") + "/"
with open(query_schema_file, "r") as schema_f:
query_schema = json.loads(schema_f.read())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
},
"value": {
"name":"value",
"type": "string"
"type": ["array", "string"]
},
"operator": {
"name": "operator",
"type": "string",
"enum": ["equals", "not_equals", "contains","not_contains"]
"enum": ["equals", "not_equals", "contains", "not_contains", "in", "not_in"]
}
,"resource": {
"name": "resource",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ parameters:
description: operator, default equals
in: query
type: string
enum: ['equals', 'not_equals', 'contains', 'not_contains']
enum: ['equals', 'not_equals', 'contains', 'not_contains', 'in', 'not_in']
- name: case_sensitive
description: case sensitive query, default False
in: query
Expand Down
Loading

0 comments on commit 6b2469b

Please sign in to comment.