Skip to content

Commit

Permalink
multi-source search
Browse files Browse the repository at this point in the history
  • Loading branch information
khaledk2 committed Sep 1, 2024
1 parent 4ea3357 commit 6c67326
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 18 deletions.
77 changes: 65 additions & 12 deletions omero_search_engine/api/v1/resources/resource_analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ def prepare_search_results(results, size=0):
continue
row = {}
returned_results.append(row)
row["Data Source"]=res["data_source"]
row["Key"] = res["Attribute"]
row["Value"] = res["Value"]
row["Number of %ss" % resource] = res.get("items_in_the_bucket")
Expand Down Expand Up @@ -474,7 +475,7 @@ def get_key_values_return_contents(name, resource, data_source, csv):


def query_cashed_bucket_part_value_keys(
name, value, resource, es_index="key_value_buckets_information"
name, value, data_source, resource, es_index="key_value_buckets_information"
):
"""
Search for and obtain the available values for an attribute and part of the
Expand All @@ -486,9 +487,14 @@ def query_cashed_bucket_part_value_keys(
if name:
name = name.strip()
value = adjust_value(value)
if data_source and data_source.strip() and data_source.lower() != "all":
data_source = [itm.strip().lower() for itm in data_source.split(',')]
else:
data_source = get_data_sources()

if resource != "all":
query = key_part_values_buckets_template.substitute(
name=name, value=value, resource=resource
name=name, value=value, resource=resource, data_source=json.dumps(data_source)
)
res = search_index_for_values_get_all_buckets(es_index, query)
returned_results = prepare_search_results_buckets(res)
Expand All @@ -501,7 +507,7 @@ def query_cashed_bucket_part_value_keys(
if table == "image1":
continue
query = key_part_values_buckets_template.substitute(
name=name, value=value, resource=table
name=name, value=value, resource=table, data_source=json.dumps(data_source)
)
res = search_index_for_values_get_all_buckets(es_index, query)
returned_results[table] = prepare_search_results_buckets(res)
Expand All @@ -512,6 +518,11 @@ def query_cashed_bucket(
name, resource, data_source, es_index="key_value_buckets_information"
):
# returns possible matches for a specific resource
if data_source and data_source.strip() and data_source.lower() != "all":
data_source = [itm.strip().lower() for itm in data_source.split(',')]
else:
data_source =get_data_sources()

if name:
name = name.strip()
if resource != "all":
Expand All @@ -527,7 +538,7 @@ def query_cashed_bucket(
returned_results = {}
for table in resource_elasticsearchindex:
query = key_values_buckets_template.substitute(
name=name, resource=table, data_source=json.dumps([data_source])
name=name, resource=table, data_source=json.dumps(data_source)
)
res = search_index_for_values_get_all_buckets(es_index, query)
returned_results[table] = prepare_search_results_buckets(res)
Expand All @@ -549,6 +560,11 @@ def search_value_for_resource(
"""
value = adjust_value(value)

if data_source and data_source.lower() != "all":
data_source = [itm.strip().lower() for itm in data_source.split(',')]
else:
data_source=get_data_sources()

if table_ != "all":
query = resource_key_values_buckets_template.substitute(
value=value, resource=table_, data_source=json.dumps(data_source)
Expand Down Expand Up @@ -606,16 +622,53 @@ def search_value_for_resource(
"""
key_part_values_buckets_template = Template(
"""
{"query":{"bool":{"must":[{"bool":{
"must":[{"match":{"Attribute.keyrnamenormalize":"$name"}},
{"wildcard":{"Value.keyvaluenormalize":"*$value*"}}
]
}},{
"bool": {"must": [
{"match":{"resource.keyresource": "$resource"}}
]}}]}}}"""
{
"query":{
"bool":{
"must":[
{
"bool":{
"must":[
{
"match":{
"Attribute.keyrnamenormalize":"$name"
}
},
{
"wildcard":{
"Value.keyvaluenormalize":"*$value*"
}
}
]
}
},
{
"bool":{
"must":[
{
"match":{
"resource.keyresource":"$resource"
}
}
]
}
},
{
"bool":{
"must":{
"terms":{
"data_source.keyvalue":$data_source
}
}
}
}
]
}
}
}"""
)


# "fields": ["Attribute","Value","items_in_the_bucket",
# "total_items_in_saved_buckets","total_buckets","total_items"],
# "_source": false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ parameters:
in: query
type: string
required: false
- name: data_source
in: query
type: string
required: false
description: If it is provided, it will return the search results for a specific data source, otherwise it will return the results from all the data sources

definitions:
data:
type: object
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ parameters:
description: bookmark is used to the call the next page if number of results is bigger than 1000, it returns with each reasult page.
in: query
type: integer
- name: data_source
in: query
type: string
required: false
description: If it is provided, it will return the search results for a specific data source, otherwise it will return the results from all the data sources
- name: return_containers
in: query
type: boolean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ parameters:
in: query
type: string
required: true
- name: data_source
in: query
type: string
required: false
description: If it is provided, it will return the search results for a specific data source, otherwise it will return the results from all the data sources
- name: data_source
in: query
type: string
required: false
description: If it is provided, it will return the search results for a specific data source, otherwise it will return the results from all the data sources
- name: csv
description: a flag to return a CSV file which is created on the fly instead of JSON
in: query
Expand Down
8 changes: 5 additions & 3 deletions omero_search_engine/api/v1/resources/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def get_values_using_value(resource_table):
file: swagger_docs/search_for_any_value.yml
"""
value = request.args.get("value")
data_source = request.args.get("data_source")
if not value:
return jsonify(
build_error_message("Error: {error}".format(error="No value is provided "))
Expand All @@ -206,7 +207,7 @@ def get_values_using_value(resource_table):
if key:
# If the key is provided it will restrict the search to the provided key.

return query_cashed_bucket_part_value_keys(key, value, resource_table)
return query_cashed_bucket_part_value_keys(key, value,data_source, resource_table)
bookmark = request.args.get("bookmark")
if bookmark:
bookmark = bookmark.split(",")
Expand Down Expand Up @@ -237,7 +238,7 @@ def get_values_using_value(resource_table):
)
)
)
return jsonify(search_value_for_resource(resource_table, value, bookmark))
return jsonify(search_value_for_resource(resource_table, value, data_source, bookmark))


@resources.route("/<resource_table>/searchvaluesusingkey/", methods=["GET"])
Expand All @@ -253,13 +254,14 @@ def search_values_for_a_key(resource_table):
# default is false
# if it sets to true, a CSV file content will be sent instead of dict
csv = request.args.get("csv")
data_source = request.args.get("data_source")
if csv:
try:
csv = json.loads(csv.lower())
except Exception:
csv = False

return get_key_values_return_contents(key, resource_table, csv)
return get_key_values_return_contents(key, resource_table,data_source, csv)


# getannotationkeys==> keys
Expand Down
6 changes: 3 additions & 3 deletions omero_search_engine/validation/results_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,13 +295,13 @@ def get_results_searchengine(self, operator=None):
if self.type == "buckets":
if self.name:
res = get_key_values_return_contents(
self.name, "image", data_source=[self.data_source], csv=False
self.name, "image", data_source=self.data_source, csv=False
)
self.searchengine_results = json.loads(res.data)
elif self.value:

self.searchengine_results = search_value_for_resource(
"image", self.value, [self.data_source]
"image", self.value, self.data_source
)
return

Expand Down Expand Up @@ -892,7 +892,7 @@ def get_omero_stats():
for name in names:
if name == "name":
continue
returned_results = query_cashed_bucket(name, resource, [data_source])
returned_results = query_cashed_bucket(name, resource, data_source)
if resource == "image":
data.append(
"%s, %s, %s,%s,%s"
Expand Down

0 comments on commit 6c67326

Please sign in to comment.