Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Oct 27, 2024
1 parent 5a32426 commit 7516ac1
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 65 deletions.
22 changes: 13 additions & 9 deletions configurations/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,11 @@ def set_database_connection_variables(config):
source.get("DATABASE").get("DATABASE_NAME"), DATABASE_URI
)
config.database_connectors[source.get("name")] = database_connector
elif source.get("CSV"):
csv_config={"Type":"CSV"}
config.FILES [source.get("name")]= csv_config
if source.get("CSV"). get("images_folder"):
csv_config["images_folder"]=source.get("CSV"). get("images_folder")
elif source.get("CSV"):
csv_config = {"Type": "CSV"}
config.FILES[source.get("name")] = csv_config
if source.get("CSV").get("images_folder"):
csv_config["images_folder"] = source.get("CSV").get("images_folder")
if source.get("CSV").get("projects_file"):
csv_config["projects_file"] = source.get("CSV").get("projects_file")
if source.get("CSV").get("screens_file"):
Expand Down Expand Up @@ -115,15 +115,19 @@ def update_config_file(updated_configuration, data_source=False):
with open(app_config.INSTANCE_CONFIG, "w") as f:
yaml.dump(configuration, f)


def config_datasource(configuration, updated_configuration):
changed = False
Found = False
if updated_configuration.get("CSV").get("type") =="CSV":
if updated_configuration.get("CSV").get("type") == "CSV":
for data_source in configuration.get("DATA_SOURCES"):
if data_source.get("name").lower()==updated_configuration.get("name").lower():
Found=True
if (
data_source.get("name").lower()
== updated_configuration.get("name").lower()
):
Found = True
for k, v in updated_configuration["CSV"].items():
if v=="CSV":
if v == "CSV":
continue
if data_source["CSV"].get(k) != v:
data_source["CSV"][k] = v
Expand Down
47 changes: 32 additions & 15 deletions manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ def get_index_data_from_database(resource="all", source="all", backup="True"):
if backup:
backup_elasticsearch_data()


# set configurations
@manager.command
@manager.option("-u", "--url", help="database server url")
Expand Down Expand Up @@ -248,29 +249,43 @@ def set_database_configuration(

@manager.command
@manager.option("-n", "--name", help="data source name")
@manager.option("-i", "--images_folder", help="path to a folder contains csv files cwhich contains the image data ")
@manager.option("-p", "--projects_file", help="path to the a file containing the projects data")
@manager.option("-s", "--screens_file", help="path to the a file containing the screens data")
@manager.option(
"-i",
"--images_folder",
help="path to a folder contains csv files cwhich contains the image data ",
)
@manager.option(
"-p", "--projects_file", help="path to the a file containing the projects data"
)
@manager.option(
"-s", "--screens_file", help="path to the a file containing the screens data"
)
@manager.option("-d", "--datasource_type", help=" data source type; supports csv")

def set_data_source_files(name=None, images_folder=None, projects_file=None, screens_file=None,datasource_type="CSV"):
source={}
def set_data_source_files(
name=None,
images_folder=None,
projects_file=None,
screens_file=None,
datasource_type="CSV",
):
source = {}
if not name:
print ("Source name attribute is missing")
print("Source name attribute is missing")
return
source["name"]=name
source_attrs={}
source["CSV"]=source_attrs
source_attrs["type"]=datasource_type
source["name"] = name
source_attrs = {}
source["CSV"] = source_attrs
source_attrs["type"] = datasource_type
if images_folder:
source_attrs["images_folder"]=images_folder
source_attrs["images_folder"] = images_folder
if projects_file:
source_attrs["projects_file"]=projects_file
source_attrs["projects_file"] = projects_file
if screens_file:
source_attrs["screens_file"]=screens_file
source_attrs["screens_file"] = screens_file

update_config_file(source, True)


@manager.command
@manager.option("-e", "--elasticsearch_url", help="elasticsearch url")
def set_elasticsearch_configuration(elasticsearch_url=None):
Expand Down Expand Up @@ -504,6 +519,7 @@ def test_container_key_value():

check_container_keys_vakues()


@manager.command
@manager.option(
"-s",
Expand All @@ -523,8 +539,9 @@ def test_container_key_value():
def get_index_data_from_csv_files(source=None, folder=None, resource="image"):
from omero_search_engine.cache_functions.elasticsearch.transform_data import (
insert_resource_data,
save_key_value_buckets
save_key_value_buckets,
)

insert_resource_data(
folder=folder, resource=resource, data_source=source, from_json=False
)
Expand Down
16 changes: 9 additions & 7 deletions omero_search_engine/api/v1/resources/resource_analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,7 @@ def get_the_results(
es_index, query
) # .search(index=es_index, body=query)
hits = results_["hits"]["hits"]
print ("===>>> Hist %s"%hits)
print("===>>> Hist %s" % hits)

if len(hits) > 0:
for hit in hits:
Expand Down Expand Up @@ -892,7 +892,6 @@ def get_the_results(
return returned_results



def get_container_values_for_key(
table_, container_name, csv, ret_data_source=None, key=None
):
Expand Down Expand Up @@ -1027,15 +1026,15 @@ def process_container_query(table_, attribute_name, container_id, key, resourse)

"""
Get all the keys bucket"""
container_project_keys_template = Template(
container_project_keys_template = Template(
"""
{"keys_search": {"nested": {"path": "key_values"},
"aggs": {"required_values": {"cardinality": {"field": "key_values.name.keynamenormalize","precision_threshold": 4000,
},},"uniquesTerms": {"terms": {"field": "key_values.name.keynamenormalize", "size": 10000}},},}}
"""
)
resource_keys_template= Template(
'''
resource_keys_template = Template(
"""
{
"size":0,
"query":{ "bool" : {"must": {
Expand Down Expand Up @@ -1064,11 +1063,14 @@ def process_container_query(table_, attribute_name, container_id, key, resourse)
}
}
}
'''
"""
)


def get_resource_keys(resource, data_source):
res_index = resource_elasticsearchindex.get(resource)
res = search_index_for_value(res_index, json.loads(resource_keys_template.substitute(data_source=data_source)))
res = search_index_for_value(
res_index,
json.loads(resource_keys_template.substitute(data_source=data_source)),
)
return res["aggregations"]["value_search"]["required_name"]["buckets"]
40 changes: 22 additions & 18 deletions omero_search_engine/api/v1/resources/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,40 +1350,43 @@ def check_empty_string(string_to_check):


def get_all_index_data(res_table, data_source):
query_return_all_data = {"query_details": {"and_filters": [], "or_filters": [], "case_sensitive": False}}
res=search_resource_annotation(
query_return_all_data = {
"query_details": {"and_filters": [], "or_filters": [], "case_sensitive": False}
}
res = search_resource_annotation(
res_table,
query_return_all_data,
return_containers=False,
data_source=data_source,
)
return res


##################
def get_number_image_inside_container(resource, res_id, data_source):
and_filters = []
main_attributes = {
"and_main_attributes": [
{
"name": "%s_id" % resource,
"value": res_id,
"operator": "equals",
"resource": "image",
},
{
"name": "data_source",
"value": data_source,
"operator": "equals",
"resource": "image",
},
]
"and_main_attributes": [
{
"name": "%s_id" % resource,
"value": res_id,
"operator": "equals",
"resource": "image",
},
{
"name": "data_source",
"value": data_source,
"operator": "equals",
"resource": "image",
},
]
}
or_filters = []
query = {"and_filters": and_filters, "or_filters": or_filters}

query_data = {
"query_details": query,
"main_attributes": main_attributes,
"query_details": query,
"main_attributes": main_attributes,
}

returned_results = search_resource_annotation("image", query_data)
Expand All @@ -1394,4 +1397,5 @@ def get_number_image_inside_container(resource, res_id, data_source):
searchengine_results = 0
return searchengine_results


#####################
40 changes: 24 additions & 16 deletions omero_search_engine/cache_functions/elasticsearch/transform_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,10 @@ def insert_resource_data(folder, resource, data_source, from_json):
finally:
pool.close()


total_process = 0


def get_insert_data_to_index(sql_st, resource, data_source, clean_index=True):
"""
- Query the postgreSQL database server and get metadata (key-value pair)
Expand Down Expand Up @@ -661,6 +664,7 @@ def insert_plate_data(folder, plate_file):
]
handle_file(file_name, es_index, cols)


def save_key_value_buckets(
resource_table_=None, data_source=None, clean_index=False, only_values=False
):
Expand Down Expand Up @@ -705,36 +709,42 @@ def save_key_value_buckets(
)
from omero_search_engine.api.v1.resources.resource_analyser import (
get_resource_keys,
get_resource_names)
from omero_search_engine.api.v1.resources.utils import get_all_index_data,get_number_image_inside_container
get_resource_names,
)
from omero_search_engine.api.v1.resources.utils import (
get_all_index_data,
get_number_image_inside_container,
)

res = get_resource_keys(resource_table, data_source)
resource_keys = [res["key"] for res in res]
# resource_keys = get_keys(resource_table, data_source)
name_results = None
if resource_table in ["project", "screen"]:
#sql = "select id, name,description from {resource}".format(
# sql = "select id, name,description from {resource}".format(
# resource=resource_table
#)
#conn = search_omero_app.config.database_connectors[data_source]
#name_result = conn.execute_query(sql)
#name_result = get_resource_names(resource=resource_table, data_source=json.dumps(data_source))
#print (name_result)
# )
# conn = search_omero_app.config.database_connectors[data_source]
# name_result = conn.execute_query(sql)
# name_result = get_resource_names(resource=resource_table, data_source=json.dumps(data_source))
# print (name_result)
# name_results = [res["name"] for res in name_results]
# Determine the number of images for each container
name_result = get_all_index_data(resource_table, data_source)
# res=get_resource_names(resource=res_tabel, data_source=json.dumps(data_source))
try:
for res in name_result["results"]["results"]:
id = res.get("id")
# if resource_table == "project":
# if resource_table == "project":
# sql_n = query_images_in_project_id.substitute(project_id=id)
#elif resource_table == "screen":
# sql_n = query_images_in_screen_id.substitute(screen_id=id)
no_images_co = get_number_image_inside_container(resource_table, id, data_source)
#no_images_co = conn.execute_query(sql_n)
# elif resource_table == "screen":
# sql_n = query_images_in_screen_id.substitute(screen_id=id)
no_images_co = get_number_image_inside_container(
resource_table, id, data_source
)
# no_images_co = conn.execute_query(sql_n)
res["no_images"] = no_images_co


name_results = [
{
"id": res["id"],
Expand Down Expand Up @@ -770,8 +780,6 @@ def save_key_value_buckets(
(key, resource_table, es_index, len(resource_keys), data_source)
)



# determine the number of processes inside the process pool
no_processors = search_omero_app.config.get("NO_PROCESSES")
if not no_processors:
Expand Down

0 comments on commit 7516ac1

Please sign in to comment.