fix merge issues

khaledk2 · Sep 27, 2024 · 8848714 · 8848714
1 parent 739cb40
commit 8848714
Show file tree

Hide file tree

Showing 13 changed files with 251 additions and 152 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -7,7 +7,7 @@ on:
     branches: [ main ]
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     services:
       postgres:
         image: postgres:16
@@ -22,19 +22,6 @@ jobs:
           --health-interval 10s
           --health-timeout 5s
           --health-retries 5
-      postgres_2:
-        image: postgres:16
-        env:
-          POSTGRES_USER: postgress
-          POSTGRES_PASSWORD: passwprd
-          POSTGRES_DB: omero_train
-        ports:
-          - 5433/tcp
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
 
       elasticsearch:
         image: elasticsearch:8.8.1
@@ -58,21 +45,17 @@ jobs:
           python -m pip install --upgrade pip
           pip install -r requirements.txt;
           # Configure database url
-          python manage.py set_database_configuration -u localhost -s  ${{ job.services.postgres.ports[5432] }} -n postgress -p passwprd -w omero1 -d omero -b omero.pgdump 
+          #python manage.py set_database_configuration -u localhost -s  ${{ job.services.postgres.ports[5432] }} -n postgress -p passwprd
+          python manage.py set_database_configuration -u localhost -s  ${{ job.services.postgres.ports[5432] }} -n postgress -p passwprd -w omero1 -d omero -b omero.pgdump
           # configure elasticsearch
           python manage.py set_elasticsearch_configuration -e localhost:${{ job.services.elasticsearch.ports[9200] }}
           # download and extract the database backup file           
-          wget https://downloads.openmicroscopy.org/images/omero_db_searchengine.zip -P app_data    
-          unzip app_data/omero_db_searchengine.zip -d app_data/  
-          wget https://github.com/khaledk2/ice-archh-64/releases/download/new_re_db/omero_train.zip -P app_data
-          unzip app_data/omero_train.zip -d app_data/
+          wget https://downloads.openmicroscopy.org/images/omero_db_searchengine.zip -P app_data
+          unzip app_data/omero_db_searchengine.zip -d app_data/          
           # run  restore omero database
-          python manage.py restore_postgresql_database 
+          python manage.py restore_postgresql_database
           # run indexing indexing
-          python manage.py get_index_data_from_database -b False 
-          # set up the seond database
-          python manage.py set_database_configuration -u localhost -s  ${{ job.services.postgres_2.ports[5433] }} -n postgress -p passwprd -w omero_train -d omero_train -b omero_train.pgdump      
-          #python manage.py restore_postgresql_database -s omero1
+          python manage.py get_index_data_from_database -b False
           # run tests
           python -m unittest discover -s unit_tests
   upload:

diff --git a/configurations/app_config.yml b/configurations/app_config.yml
@@ -4,12 +4,13 @@ ASYNCHRONOUS_SEARCH : True
 ELASTICSEARCH_URL : "https://localhost:9200"
 IDR_TEST_FILE_URL : "https://raw.githubusercontent.com/IDR/idr.openmicroscopy.org/master/_data/studies.tsv"
 PAGE_SIZE : 1000
-CACHE_ROWS : 10000
-MAX_RETUNED_ITEMS : 1700000
+CACHE_ROWS : 1000
+MAX_RETUNED_ITEMS : 1000
 ELASTICSEARCH_BACKUP_FOLDER: "path/to/elasticsearch/backup/folder"
 verify_certs: False
 ELASTIC_PASSWORD: elasticsearch_user_password
 BASE_FOLDER: /etc/searchengine/
+#NO_PROCESSES: 1
 DATA_SOURCES:
     - name: omero1
       DATABASE:

diff --git a/manage.py b/manage.py
@@ -123,7 +123,6 @@ def restore_postgresql_database(source="all"):
 
     restore_database(source)
 
-
 @manager.command
 @manager.option(
     "-r",
@@ -176,24 +175,24 @@ def get_index_data_from_database(resource="all", source="all", backup="True"):
             if resource.lower() != "all" and resource.lower() != res.lower():
                 continue
             get_insert_data_to_index(sql_st, res, data_source, clean_index)
-
         save_key_value_buckets(
             resource_table_=None,
             data_source=data_source,
             clean_index=clean_index,
             only_values=False,
         )
-
+        print("!Done for data_source: %s from %s" % (data_source, search_omero_app.config.database_connectors.keys()))
         if clean_index:
             clean_index = False
+
         # validat ethe indexing
         #test_indexing_search_query(
         #    source=data_source, deep_check=False, check_studies=True
         #)
 
-    # backup the index data
-    # if backup:
-    #    backup_elasticsearch_data()
+    #backup the index data
+    if backup:
+      backup_elasticsearch_data()
 
 
 # set configurations
@@ -334,6 +333,12 @@ def set_no_processes(no_processes=None):
 
 
 @manager.command
+@manager.option(
+    "-d",
+    "--data_source",
+    help="data source name, the default is all",  # noqa
+)
+
 @manager.option(
     "-r",
     "--resource",
@@ -345,15 +350,15 @@ def set_no_processes(no_processes=None):
     help="creating the elastic search index if set to True",  # noqa
 )
 @manager.option("-o", "--only_values", help="creating cached values only ")
-def cache_key_value_index(resource=None, create_index=None, only_values=None):
+def cache_key_value_index(resource=None, data_source='all',create_index=None, only_values=None):
     """
     Cache the value bucket for each value for each resource
     """
     from omero_search_engine.cache_functions.elasticsearch.transform_data import (
         save_key_value_buckets,
     )
 
-    save_key_value_buckets(resource, create_index, only_values)
+    save_key_value_buckets(resource,data_source ,create_index, only_values)
 
 
 @manager.command

diff --git a/omero_search_engine/__init__.py b/omero_search_engine/__init__.py
@@ -82,6 +82,7 @@ def create_app(config_name="development"):
         scheme="https",
         http_auth=("elastic", ELASTIC_PASSWORD),
     )
+
     search_omero_app.config.database_connectors = app_config.database_connectors
     print(search_omero_app.config.database_connectors)
     search_omero_app.config["es_connector"] = es_connector
@@ -104,6 +105,7 @@ def create_app(config_name="development"):
 
     search_omero_app.logger.setLevel(logging.INFO)
     search_omero_app.logger.info("app assistant startup")
+
     return search_omero_app
 
 

diff --git a/omero_search_engine/api/v1/resources/query_handler.py b/omero_search_engine/api/v1/resources/query_handler.py
@@ -459,13 +459,13 @@ def search_query(
     search_omero_app.logger.info(
         "-------------------------------------------------"
     )  # noqa
-    search_omero_app.logger.info("1: %s"%query)
-    search_omero_app.logger.info("2: %s"%main_attributes)
+    search_omero_app.logger.info(query)
+    search_omero_app.logger.info(main_attributes)
     search_omero_app.logger.info(resource)
     search_omero_app.logger.info(
         "-------------------------------------------------"
     )  # noqa
-    search_omero_app.logger.info(("1... %s, 2....%s") % (resource, query))
+    search_omero_app.logger.info(("%s, %s") % (resource, query))
     if not main_attributes:
         q_data = {"query": {"query_details": query}}
     elif resource == "image":
@@ -493,6 +493,9 @@ def search_query(
             ress = search_resource_annotation(
                 resource, q_data.get("query"), return_containers=return_containers, data_source=data_source
             )
+        if type (ress) is str:
+            return ress
+
         ress["Error"] = "none"
         return ress
     except Exception as ex:

diff --git a/omero_search_engine/api/v1/resources/resource_analyser.py b/omero_search_engine/api/v1/resources/resource_analyser.py
@@ -41,7 +41,7 @@
 }}}}}}}}"""
 )
 key_number_search_template = Template(
-    """
+    r"""
 {
    "size":0,
     "query":{ "bool" : {"must": {
@@ -261,7 +261,11 @@ def get_number_of_buckets(key, data_source, res_index):
 def get_all_values_for_a_key(table_, data_source, key):
     res_index = resource_elasticsearchindex.get(table_)
     query = key_number_search_template.substitute(key=key, data_source=data_source)
-    res = search_index_for_value(res_index, query)
+    try:
+        res = search_index_for_value(res_index, query)
+    except Exception as ex:
+        print("Query: %s Error: %s"%(query,str(ex)))
+        raise ex
     number_of_buckets = (
         res.get("aggregations")
         .get("value_search")
@@ -333,7 +337,6 @@ def get_values_for_a_key(table_, key, data_source):
     start_time = time.time()
     res = search_index_for_value(res_index, query)
     query_time = "%.2f" % (time.time() - start_time)
-    print("TIME ...", query_time)
     returned_results = []
     if res.get("aggregations"):
         for bucket in (
@@ -889,7 +892,7 @@ def get_resource_attribute_values(
     return returned_results
 
 
-def get_resource_names(resource, name=None, description=False):
+def get_resource_names(resource, name=None, description=False, data_source=None):
     """
     return resources names attributes
     It works for projects and screens but can be extended.
@@ -898,21 +901,22 @@ def get_resource_names(resource, name=None, description=False):
         return build_error_message(
             "This release does not support search by description."
         )
-
     if resource != "all":
-        returned_results = get_the_results(resource, name, description)
+        returned_results = get_the_results(resource, name, description, data_source)
     else:
         returned_results = {}
         ress = ["project", "screen"]
         for res in ress:
-            returned_results[res] = get_the_results(res, name, description)
-
+            returned_results[res] = get_the_results(res, name, description, data_source)
     return returned_results
 
 
-def get_the_results(resource, name, description, es_index="key_values_resource_cach"):
+def get_the_results(resource, name, description, data_source, es_index="key_values_resource_cach"):
     returned_results = {}
-    query = key_values_buckets_template_2.substitute(resource=resource)
+    if data_source:
+        query = key_values_buckets_template_with_data_source.substitute(resource=resource, data_source=data_source)
+    else:
+        query = key_values_buckets_template_2.substitute(resource=resource)
     results_ = connect_elasticsearch(
         es_index, query
     )  # .search(index=es_index, body=query)

diff --git a/omero_search_engine/api/v1/resources/swagger_docs/getresourcenames.yml b/omero_search_engine/api/v1/resources/swagger_docs/getresourcenames.yml
@@ -12,6 +12,11 @@ parameters:
     in: query
     type: string
     required: false
+  - name: data_source
+    in: query
+    type: string
+    required: false
+    description: If it is provided, it will return the search results for a specific data source, otherwise it will return the results from all the data sources
 definitions:
   names:
     type: array

diff --git a/omero_search_engine/api/v1/resources/urls.py b/omero_search_engine/api/v1/resources/urls.py
@@ -25,6 +25,7 @@
     build_error_message,
     adjust_query_for_container,
     get_data_sources,
+    check_empty_string,
 )
 from omero_search_engine.api.v1.resources.resource_analyser import (
     search_value_for_resource,
@@ -91,6 +92,8 @@ def search_resource_page(resource_table):
             pagination_dict = data.get("pagination")
             return_containers = data.get("return_containers")
             data_source = request.args.get("data_source")
+            if data_source:
+                data_source=data_source.strip()
             if return_containers:
                 return_containers = json.loads(return_containers.lower())
 
@@ -172,6 +175,8 @@ def search_resource(resource_table):
     if validation_results == "OK":
         return_containers = request.args.get("return_containers")
         data_source = request.args.get("data_source")
+        if data_source:
+            data_source = data_source.strip()
         if return_containers:
             return_containers = json.loads(return_containers.lower())
 
@@ -190,6 +195,8 @@ def get_values_using_value(resource_table):
     """
     value = request.args.get("value")
     data_source = request.args.get("data_source")
+    if data_source:
+        data_source = data_source.strip()
     if not value:
         return jsonify(
             build_error_message("Error: {error}".format(error="No value is provided "))
@@ -255,6 +262,8 @@ def search_values_for_a_key(resource_table):
     # if it sets to true, a CSV file content will be sent instead of dict
     csv = request.args.get("csv")
     data_source = request.args.get("data_source")
+    if data_source:
+        data_source = data_source.strip()
     if csv:
         try:
             csv = json.loads(csv.lower())
@@ -276,6 +285,8 @@ def get_resource_keys(resource_table):
     """
     mode = request.args.get("mode")
     data_source = request.args.get("data_source")
+    if data_source:
+        data_source = data_source.strip()
     resource_keys = get_resource_attributes(resource_table, data_source=data_source, mode=mode)
     return jsonify(resource_keys)
 
@@ -315,14 +326,19 @@ def get_resource_names_(resource_table):
 
     value = request.args.get("value")
     description = request.args.get("use_description")
+    data_source = request.args.get("data_source")
+    data_source=check_empty_string(data_source)
+    if data_source:
+        data_source=data_source.strip(",")
+        data_source=json.dumps(data_source)
     if description:
         if description.lower() in ["true", "false"]:
             description = json.loads(description.lower())
         elif description == "1":
             description = True
         else:
             description = False
-    return jsonify(get_resource_names(resource_table, value, description))
+    return jsonify(get_resource_names(resource_table, value, description, data_source))
 
 
 @resources.route("/submitquery/containers/", methods=["POST"])
@@ -339,6 +355,8 @@ def submit_query_return_containers():
     adjust_query_for_container(query)
     return_columns = request.args.get("return_columns")
     data_source = request.args.get("data_source")
+    if data_source:
+        data_source = data_source.strip()
     if return_columns:
         try:
             return_columns = json.loads(return_columns.lower())
@@ -367,6 +385,8 @@ def submit_query():
     adjust_query_for_container(query)
     return_columns = request.args.get("return_columns")
     data_source = request.args.get("data_source")
+    if data_source:
+        data_source = data_source.strip()
     if return_columns:
         try:
             return_columns = json.loads(return_columns.lower())
@@ -392,6 +412,7 @@ def search(resource_table):
     operator = request.args.get("operator")
     bookmark = request.args.get("bookmark")
     data_source = request.args.get("data_source")
+    data_source=check_empty_string(data_source)
     return_containers = request.args.get("return_containers")
     if return_containers:
         return_containers = json.loads(return_containers.lower())