Skip to content

Commit

Permalink
fix merge issues
Browse files Browse the repository at this point in the history
  • Loading branch information
khaledk2 committed Sep 27, 2024
1 parent 739cb40 commit 8848714
Show file tree
Hide file tree
Showing 13 changed files with 251 additions and 152 deletions.
31 changes: 7 additions & 24 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
branches: [ main ]
jobs:
build:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
services:
postgres:
image: postgres:16
Expand All @@ -22,19 +22,6 @@ jobs:
--health-interval 10s
--health-timeout 5s
--health-retries 5
postgres_2:
image: postgres:16
env:
POSTGRES_USER: postgress
POSTGRES_PASSWORD: passwprd
POSTGRES_DB: omero_train
ports:
- 5433/tcp
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
elasticsearch:
image: elasticsearch:8.8.1
Expand All @@ -58,21 +45,17 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements.txt;
# Configure database url
python manage.py set_database_configuration -u localhost -s ${{ job.services.postgres.ports[5432] }} -n postgress -p passwprd -w omero1 -d omero -b omero.pgdump
#python manage.py set_database_configuration -u localhost -s ${{ job.services.postgres.ports[5432] }} -n postgress -p passwprd
python manage.py set_database_configuration -u localhost -s ${{ job.services.postgres.ports[5432] }} -n postgress -p passwprd -w omero1 -d omero -b omero.pgdump
# configure elasticsearch
python manage.py set_elasticsearch_configuration -e localhost:${{ job.services.elasticsearch.ports[9200] }}
# download and extract the database backup file
wget https://downloads.openmicroscopy.org/images/omero_db_searchengine.zip -P app_data
unzip app_data/omero_db_searchengine.zip -d app_data/
wget https://github.com/khaledk2/ice-archh-64/releases/download/new_re_db/omero_train.zip -P app_data
unzip app_data/omero_train.zip -d app_data/
wget https://downloads.openmicroscopy.org/images/omero_db_searchengine.zip -P app_data
unzip app_data/omero_db_searchengine.zip -d app_data/
# run restore omero database
python manage.py restore_postgresql_database
python manage.py restore_postgresql_database
# run indexing indexing
python manage.py get_index_data_from_database -b False
# set up the seond database
python manage.py set_database_configuration -u localhost -s ${{ job.services.postgres_2.ports[5433] }} -n postgress -p passwprd -w omero_train -d omero_train -b omero_train.pgdump
#python manage.py restore_postgresql_database -s omero1
python manage.py get_index_data_from_database -b False
# run tests
python -m unittest discover -s unit_tests
upload:
Expand Down
5 changes: 3 additions & 2 deletions configurations/app_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ ASYNCHRONOUS_SEARCH : True
ELASTICSEARCH_URL : "https://localhost:9200"
IDR_TEST_FILE_URL : "https://raw.githubusercontent.com/IDR/idr.openmicroscopy.org/master/_data/studies.tsv"
PAGE_SIZE : 1000
CACHE_ROWS : 10000
MAX_RETUNED_ITEMS : 1700000
CACHE_ROWS : 1000
MAX_RETUNED_ITEMS : 1000
ELASTICSEARCH_BACKUP_FOLDER: "path/to/elasticsearch/backup/folder"
verify_certs: False
ELASTIC_PASSWORD: elasticsearch_user_password
BASE_FOLDER: /etc/searchengine/
#NO_PROCESSES: 1
DATA_SOURCES:
- name: omero1
DATABASE:
Expand Down
21 changes: 13 additions & 8 deletions manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ def restore_postgresql_database(source="all"):

restore_database(source)


@manager.command
@manager.option(
"-r",
Expand Down Expand Up @@ -176,24 +175,24 @@ def get_index_data_from_database(resource="all", source="all", backup="True"):
if resource.lower() != "all" and resource.lower() != res.lower():
continue
get_insert_data_to_index(sql_st, res, data_source, clean_index)

save_key_value_buckets(
resource_table_=None,
data_source=data_source,
clean_index=clean_index,
only_values=False,
)

print("!Done for data_source: %s from %s" % (data_source, search_omero_app.config.database_connectors.keys()))
if clean_index:
clean_index = False

# validat ethe indexing
#test_indexing_search_query(
# source=data_source, deep_check=False, check_studies=True
#)

# backup the index data
# if backup:
# backup_elasticsearch_data()
#backup the index data
if backup:
backup_elasticsearch_data()


# set configurations
Expand Down Expand Up @@ -334,6 +333,12 @@ def set_no_processes(no_processes=None):


@manager.command
@manager.option(
"-d",
"--data_source",
help="data source name, the default is all", # noqa
)

@manager.option(
"-r",
"--resource",
Expand All @@ -345,15 +350,15 @@ def set_no_processes(no_processes=None):
help="creating the elastic search index if set to True", # noqa
)
@manager.option("-o", "--only_values", help="creating cached values only ")
def cache_key_value_index(resource=None, create_index=None, only_values=None):
def cache_key_value_index(resource=None, data_source='all',create_index=None, only_values=None):
"""
Cache the value bucket for each value for each resource
"""
from omero_search_engine.cache_functions.elasticsearch.transform_data import (
save_key_value_buckets,
)

save_key_value_buckets(resource, create_index, only_values)
save_key_value_buckets(resource,data_source ,create_index, only_values)


@manager.command
Expand Down
2 changes: 2 additions & 0 deletions omero_search_engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def create_app(config_name="development"):
scheme="https",
http_auth=("elastic", ELASTIC_PASSWORD),
)

search_omero_app.config.database_connectors = app_config.database_connectors
print(search_omero_app.config.database_connectors)
search_omero_app.config["es_connector"] = es_connector
Expand All @@ -104,6 +105,7 @@ def create_app(config_name="development"):

search_omero_app.logger.setLevel(logging.INFO)
search_omero_app.logger.info("app assistant startup")

return search_omero_app


Expand Down
9 changes: 6 additions & 3 deletions omero_search_engine/api/v1/resources/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,13 +459,13 @@ def search_query(
search_omero_app.logger.info(
"-------------------------------------------------"
) # noqa
search_omero_app.logger.info("1: %s"%query)
search_omero_app.logger.info("2: %s"%main_attributes)
search_omero_app.logger.info(query)
search_omero_app.logger.info(main_attributes)
search_omero_app.logger.info(resource)
search_omero_app.logger.info(
"-------------------------------------------------"
) # noqa
search_omero_app.logger.info(("1... %s, 2....%s") % (resource, query))
search_omero_app.logger.info(("%s, %s") % (resource, query))
if not main_attributes:
q_data = {"query": {"query_details": query}}
elif resource == "image":
Expand Down Expand Up @@ -493,6 +493,9 @@ def search_query(
ress = search_resource_annotation(
resource, q_data.get("query"), return_containers=return_containers, data_source=data_source
)
if type (ress) is str:
return ress

ress["Error"] = "none"
return ress
except Exception as ex:
Expand Down
24 changes: 14 additions & 10 deletions omero_search_engine/api/v1/resources/resource_analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
}}}}}}}}"""
)
key_number_search_template = Template(
"""
r"""
{
"size":0,
"query":{ "bool" : {"must": {
Expand Down Expand Up @@ -261,7 +261,11 @@ def get_number_of_buckets(key, data_source, res_index):
def get_all_values_for_a_key(table_, data_source, key):
res_index = resource_elasticsearchindex.get(table_)
query = key_number_search_template.substitute(key=key, data_source=data_source)
res = search_index_for_value(res_index, query)
try:
res = search_index_for_value(res_index, query)
except Exception as ex:
print("Query: %s Error: %s"%(query,str(ex)))
raise ex
number_of_buckets = (
res.get("aggregations")
.get("value_search")
Expand Down Expand Up @@ -333,7 +337,6 @@ def get_values_for_a_key(table_, key, data_source):
start_time = time.time()
res = search_index_for_value(res_index, query)
query_time = "%.2f" % (time.time() - start_time)
print("TIME ...", query_time)
returned_results = []
if res.get("aggregations"):
for bucket in (
Expand Down Expand Up @@ -889,7 +892,7 @@ def get_resource_attribute_values(
return returned_results


def get_resource_names(resource, name=None, description=False):
def get_resource_names(resource, name=None, description=False, data_source=None):
"""
return resources names attributes
It works for projects and screens but can be extended.
Expand All @@ -898,21 +901,22 @@ def get_resource_names(resource, name=None, description=False):
return build_error_message(
"This release does not support search by description."
)

if resource != "all":
returned_results = get_the_results(resource, name, description)
returned_results = get_the_results(resource, name, description, data_source)
else:
returned_results = {}
ress = ["project", "screen"]
for res in ress:
returned_results[res] = get_the_results(res, name, description)

returned_results[res] = get_the_results(res, name, description, data_source)
return returned_results


def get_the_results(resource, name, description, es_index="key_values_resource_cach"):
def get_the_results(resource, name, description, data_source, es_index="key_values_resource_cach"):
returned_results = {}
query = key_values_buckets_template_2.substitute(resource=resource)
if data_source:
query = key_values_buckets_template_with_data_source.substitute(resource=resource, data_source=data_source)
else:
query = key_values_buckets_template_2.substitute(resource=resource)
results_ = connect_elasticsearch(
es_index, query
) # .search(index=es_index, body=query)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ parameters:
in: query
type: string
required: false
- name: data_source
in: query
type: string
required: false
description: If it is provided, it will return the search results for a specific data source, otherwise it will return the results from all the data sources
definitions:
names:
type: array
Expand Down
23 changes: 22 additions & 1 deletion omero_search_engine/api/v1/resources/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
build_error_message,
adjust_query_for_container,
get_data_sources,
check_empty_string,
)
from omero_search_engine.api.v1.resources.resource_analyser import (
search_value_for_resource,
Expand Down Expand Up @@ -91,6 +92,8 @@ def search_resource_page(resource_table):
pagination_dict = data.get("pagination")
return_containers = data.get("return_containers")
data_source = request.args.get("data_source")
if data_source:
data_source=data_source.strip()
if return_containers:
return_containers = json.loads(return_containers.lower())

Expand Down Expand Up @@ -172,6 +175,8 @@ def search_resource(resource_table):
if validation_results == "OK":
return_containers = request.args.get("return_containers")
data_source = request.args.get("data_source")
if data_source:
data_source = data_source.strip()
if return_containers:
return_containers = json.loads(return_containers.lower())

Expand All @@ -190,6 +195,8 @@ def get_values_using_value(resource_table):
"""
value = request.args.get("value")
data_source = request.args.get("data_source")
if data_source:
data_source = data_source.strip()
if not value:
return jsonify(
build_error_message("Error: {error}".format(error="No value is provided "))
Expand Down Expand Up @@ -255,6 +262,8 @@ def search_values_for_a_key(resource_table):
# if it sets to true, a CSV file content will be sent instead of dict
csv = request.args.get("csv")
data_source = request.args.get("data_source")
if data_source:
data_source = data_source.strip()
if csv:
try:
csv = json.loads(csv.lower())
Expand All @@ -276,6 +285,8 @@ def get_resource_keys(resource_table):
"""
mode = request.args.get("mode")
data_source = request.args.get("data_source")
if data_source:
data_source = data_source.strip()
resource_keys = get_resource_attributes(resource_table, data_source=data_source, mode=mode)
return jsonify(resource_keys)

Expand Down Expand Up @@ -315,14 +326,19 @@ def get_resource_names_(resource_table):

value = request.args.get("value")
description = request.args.get("use_description")
data_source = request.args.get("data_source")
data_source=check_empty_string(data_source)
if data_source:
data_source=data_source.strip(",")
data_source=json.dumps(data_source)
if description:
if description.lower() in ["true", "false"]:
description = json.loads(description.lower())
elif description == "1":
description = True
else:
description = False
return jsonify(get_resource_names(resource_table, value, description))
return jsonify(get_resource_names(resource_table, value, description, data_source))


@resources.route("/submitquery/containers/", methods=["POST"])
Expand All @@ -339,6 +355,8 @@ def submit_query_return_containers():
adjust_query_for_container(query)
return_columns = request.args.get("return_columns")
data_source = request.args.get("data_source")
if data_source:
data_source = data_source.strip()
if return_columns:
try:
return_columns = json.loads(return_columns.lower())
Expand Down Expand Up @@ -367,6 +385,8 @@ def submit_query():
adjust_query_for_container(query)
return_columns = request.args.get("return_columns")
data_source = request.args.get("data_source")
if data_source:
data_source = data_source.strip()
if return_columns:
try:
return_columns = json.loads(return_columns.lower())
Expand All @@ -392,6 +412,7 @@ def search(resource_table):
operator = request.args.get("operator")
bookmark = request.args.get("bookmark")
data_source = request.args.get("data_source")
data_source=check_empty_string(data_source)
return_containers = request.args.get("return_containers")
if return_containers:
return_containers = json.loads(return_containers.lower())
Expand Down
Loading

0 comments on commit 8848714

Please sign in to comment.