Merge branch 'main' into tools

khaledk2 · Aug 1, 2024 · 0fb4ae0 · 0fb4ae0
2 parents a237f8e + d368bad
commit 0fb4ae0
Show file tree

Hide file tree

Showing 25 changed files with 1,337 additions and 153 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -48,6 +48,13 @@ jobs:
           python manage.py set_database_configuration -u localhost -s  ${{ job.services.postgres.ports[5432] }} -n postgress -p passwprd
           # configure elasticsearch
           python manage.py set_elasticsearch_configuration -e localhost:${{ job.services.elasticsearch.ports[9200] }}
+          # download and extract the database backup file           
+          wget https://downloads.openmicroscopy.org/images/omero_db_searchengine.zip -P app_data
+          unzip app_data/omero_db_searchengine.zip -d app_data/          
+          # run  restore omero database
+          python manage.py restore_postgresql_database
+          # run indexing indexing
+          python manage.py get_index_data_from_database -b False
           # run tests
           python -m unittest discover -s unit_tests
   upload:
@@ -85,6 +92,6 @@ jobs:
         uses: docker/build-push-action@v2
         with:
           context: .
-          file: deployment/docker/centos/Dockerfile
+          file: deployment/docker/rockylinux/Dockerfile
           push: true
           tags: ${{ join(fromJson(steps.gettags.outputs.tags)) }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,12 +1,12 @@
 ---
 repos:
   - repo: https://github.com/psf/black
-    rev: 23.1.0
+    rev: 24.4.2
     hooks:
       - id: black
         args: [--target-version=py35]
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 7.1.0
     hooks:
       - id: flake8
         args: [

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,8 +1,11 @@
+0.5.4 (March 2024):
+--------------------
+- Support Rocky Linux 9  [#93](https://github.com/ome/omero_search_engine/pull/93)
+
 0.5.3 (September 2023):
 -----------------------
 - Secure the connection with elasticsearch [#92](https://github.com/ome/omero_search_engine/pull/92)
 
-
 0.5.2 (June 2023):
 ------------------
 - Return all the available values for a key in a container [#77](https://github.com/ome/omero_search_engine/pull/77)

diff --git a/app_data/test_index_data.json b/app_data/test_index_data.json
@@ -96,5 +96,27 @@
             "validation screen"
          ]
       ]
-   }
+   },
+    "query_in": {
+       "image": [
+         [
+          "Gene Symbol",
+          [
+             "Duoxa2",
+             "Bach2",
+             "Cxcr2",
+             "Mysm1"
+          ]
+          ],
+          [
+          "Organism",
+          [
+             "homo sapiens",
+             "mus musculus",
+             "mus musculus x mus spretus",
+             "human adenovirus 2"
+          ]
+       ]
+          ]
+    }
 }
diff --git a/configurations/app_config.yml b/configurations/app_config.yml
@@ -16,3 +16,4 @@ verify_certs: False
 ELASTIC_PASSWORD: elasticsearch_user_password
 SEARCHENGINE_LOGS_FOLDER: path/to/search/logs/folder
 SEARCHENGINE_ADMIN_PASSWD: "hashed_admin_password"
+BASE_FOLDER: /etc/searchengine/
diff --git a/deployment/docker/rockylinux/.dockerignore b/deployment/docker/rockylinux/.dockerignore
@@ -0,0 +1,3 @@
+Dockerfile
+.git
+.gitignore
diff --git a/deployment/docker/rockylinux/Dockerfile b/deployment/docker/rockylinux/Dockerfile
@@ -0,0 +1,23 @@
+#docker build . -t searchengine
+# docker build . -f deployment/docker/rockylinux/Dockerfile -t searchengine
+FROM rockylinux/rockylinux:9.0
+USER root
+RUN dnf update -y
+RUN dnf groupinstall "Development Tools" -y
+RUN  dnf install libpq-devel -y
+RUN dnf install python3-pip -y
+RUN dnf install -y   python3-devel.x86_64
+RUN dnf clean all && rm -rf /var/cache/yum
+RUN mkdir /searchengine
+ADD  deployment/docker/rockylinux/start_gunicorn_serch_engine.sh /searchengine
+ADD  deployment/docker/rockylinux/run_app.sh /searchengine
+ADD .  /searchengine
+RUN cd /searchengine
+RUN mkdir /etc/searchengine
+RUN mkdir /etc/searchengine/chachedata
+RUN mkdir /etc/searchengine/logs
+WORKDIR /searchengine
+RUN pip3 install -r  requirements.txt
+RUN pip3 install gunicorn
+EXPOSE 5577
+ENTRYPOINT ["bash", "run_app.sh"]
diff --git a/deployment/docker/rockylinux/run_app.sh b/deployment/docker/rockylinux/run_app.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+echo "$@"
+
+#test if the configuration file exists, if not it will copy it from the app configuration folder
+test -f /etc/searchengine/.app_config.yml || cp /searchengine/configurations/app_config.yml /etc/searchengine/.app_config.yml
+
+#Check the script input
+if [[ $@ == run_app* ]] ; then
+  url_perfix=${@/run_app/}
+  echo using prefix: $url_perfix
+  bash start_gunicorn_serch_engine.sh $url_perfix
+elif [ -z  "$@" ] || [ "$@" = "run_app" ]; then
+  echo "Starting the app"
+  bash start_gunicorn_serch_engine.sh
+else
+  echo "$@"
+  python3 manage.py "$@"
+fi
diff --git a/deployment/docker/rockylinux/start_gunicorn_serch_engine.sh b/deployment/docker/rockylinux/start_gunicorn_serch_engine.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+NAME="omero_search_engine"
+USER root
+APPPATH=/searchengine
+SOCKFILE=/etc/searchengine/sock3 #change this to project_dir/sock (new file will be created)
+echo "Starting $NAME as `whoami`"
+export PATH="$APPPATH:$PATH"
+echo "staring the app"
+# Create the run directory if it doesn't exist
+RUNDIR=$(dirname $SOCKFILE)
+echo "$RUNDIR"
+test -d $RUNDIR || mkdir -p $RUNDIR
+LOGS=/etc/searchengine/logs
+LOGSDIR=$(dirname $LOGS)
+test -d $LOGSDIR || mkdir -p $LOGSDIR
+user=$USER
+echo "Start Gunicorn ...."
+echo "$HOME"
+echo pwd
+cd $APPPATH
+if [ -z  "$@" ]; then
+  exec gunicorn "omero_search_engine:create_app('production')" -b 0.0.0.0:5577 --timeout 0 --name "$NAME"   --bind=unix:$SOCKFILE  --log-file=$LOGSDIR/logs/engine_gunilog.log --access-logfile=$LOGSDIR/logs/engine_access.log -error-logfile=$LOGSDIR/logs/engine_logs/engine_error.log  --workers 4
+else
+  echo Run with SCRIPT_NAME=$@
+  SCRIPT_NAME=/"$@"/ exec gunicorn "omero_search_engine:create_app('production')" -b 0.0.0.0:5577 --timeout 0 --name "$NAME"   --bind=unix:$SOCKFILE  --log-file=$LOGSDIR/logs/engine_gunilog.log --access-logfile=$LOGSDIR/logs/engine_access.log -error-logfile=$LOGSDIR/logs/engine_logs/engine_error.log  --workers 4
+fi
diff --git a/examples/search.py b/examples/search.py
@@ -88,5 +88,9 @@ def call_omero_return_results(url, data=None, method="post"):
         % (len(received_results), total_results, page, total_pages, bookmark)
     )
 
-# 2000 /11686633, page: 1/11687, bookmark: 109600
-# 2000 /12225067, page: 1/12226, bookmark:  109600
+# another example using in operators and send items inside value as a string,
+# The List items are separated by ','
+logging.info("Using in operator")
+url = "%s%s?key=Gene Symbol&value=Pdgfc,Rnase10&operator=in" % (base_url, image_search)
+bookmark, total_results, total_pages = call_omero_return_results(url, method="get")
+logging.info("%s,%s" % (total_results, total_pages))
diff --git a/examples/using_in_operator.py b/examples/using_in_operator.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2024 University of Dundee & Open Microscopy Environment.
+# All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+from utils import query_the_search_ending, logging
+
+# It is similar to use the 'in' operator in a sql statement,
+# rather than having multiple 'or' conditions,
+# it will only use a single condition.
+
+# The following example will search for the images which have any of the 'Gene Symbol'
+# values in this list ["Duoxa2", "Bach2", "Cxcr2", "Mysm1"]
+
+# and filters
+
+logging.info("Example of using in operator")
+
+
+values_in = ["Duoxa2", "Bach2", "Cxcr2", "Mysm1"]
+logging.info("Searching for 'Gene Symbol' with values in [%s]" % (",".join(values_in)))
+and_filters = [{"name": "Gene Symbol", "value": values_in, "operator": "in"}]
+
+main_attributes = []
+query = {"and_filters": and_filters}
+#
+recieved_results_data = query_the_search_ending(query, main_attributes)
diff --git a/examples/using_not_in_operator.py b/examples/using_not_in_operator.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2024 University of Dundee & Open Microscopy Environment.
+# All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+from utils import query_the_search_ending, logging
+
+# It is similar to use the 'not in' operator in a sql statement,
+# rather than having multiple 'or' conditions with not_equals operators,
+# it will only use a single condition.
+
+# The following example will search for the images which have met any of the 'Organism'
+# values in this list
+# ["homo sapiens","mus musculus","mus musculus x mus spretus","human adenovirus 2"]
+
+# and filters
+
+logging.info("Example of using not_in operator")
+
+
+values_not_in = [
+    "homo sapiens",
+    "mus musculus",
+    "mus musculus x mus spretus",
+    "human adenovirus 2",
+]
+logging.info("Searching for 'Organism' with values in [%s]" % (",".join(values_not_in)))
+and_filters = [{"name": "Organism", "value": values_not_in, "operator": "not_in"}]
+
+main_attributes = []
+query = {"and_filters": and_filters}
+#
+received_results_data = query_the_search_ending(query, main_attributes)
diff --git a/manage.py b/manage.py
@@ -114,13 +114,25 @@ def sql_results_to_panda():
     pass
 
 
+@manager.command
+def restore_postgresql_database():
+    from omero_search_engine.database.utils import restore_database
+
+    restore_database()
+
+
 @manager.command
 @manager.option(
     "-r",
     "--resource",
     help="resource name, creating all the indexes for all the resources is the default",  # noqa
 )
-def get_index_data_from_database(resource="all"):
+@manager.option(
+    "-b",
+    "--backup",
+    help="if True, backup will be called ",  # noqa
+)
+def get_index_data_from_database(resource="all", backup="True"):
     """
     insert data in Elasticsearch index for each resource
     It gets the data from postgres database server
@@ -132,7 +144,9 @@ def get_index_data_from_database(resource="all"):
         get_insert_data_to_index,
         save_key_value_buckets,
     )
+    import json
 
+    backup = json.loads(backup.lower())
     if resource != "all":
         sql_st = sqls_resources.get(resource)
         if not sql_st:
@@ -148,7 +162,8 @@ def get_index_data_from_database(resource="all"):
         test_indexing_search_query(deep_check=False, check_studies=True)
 
     # backup the index data
-    backup_elasticsearch_data()
+    if backup:
+        backup_elasticsearch_data()
 
 
 # set configurations
@@ -351,6 +366,44 @@ def restore_elasticsearch_data():
     restore_indices_data()
 
 
+@manager.command
+@manager.option("-s", "--screen_name", help="Screen name, or part of it")
+@manager.option("-p", "--project_name", help="Project name, or part of it")
+def data_validator(screen_name=None, project_name=None):
+    """
+    Checking key-value pair for trailing and heading space.
+    It also checks the key-value pair duplication.
+    It can check all the projects and screens.
+    Also, it can run for a specific project or screen.
+    The output is a collection of CSV files; each check usually generates three files:
+    The main file contains image details (e.g. image id)
+    in addition to the key and the value.
+    one file for screens and one for projects.
+    Each file contains the screen name (project name),
+      the key-value pair which has the issue and the total number of affected
+      images for each row.
+    """
+    from datetime import datetime
+
+    if screen_name and project_name:
+        print("Either screen name or project name is allowed")
+
+    from omero_search_engine.validation.omero_keyvalue_data_validator import (
+        check_for_heading_space,
+        check_for_trailing_space,
+        check_duplicated_keyvalue_pairs,
+    )
+
+    start = datetime.now()
+    check_for_trailing_space(screen_name, project_name)
+    start1 = datetime.now()
+    check_for_heading_space(screen_name, project_name)
+    start2 = datetime.now()
+    check_duplicated_keyvalue_pairs(screen_name, project_name)
+    end = datetime.now()
+    print("start: %s, start1: %s, start2: %s, end: %s" % (start, start1, start2, end))
+
+
 @manager.command
 def test_container_key_value():
     from omero_search_engine.validation.results_validator import (
@@ -404,4 +457,7 @@ def set_logs_folder(logs_folder=None):
 
 
 if __name__ == "__main__":
+    from flask_script import Command
+
+    Command.capture_all_args = False
     manager.run()