From 6c673266b9d18cf087de803f62479fa174e433f8 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Sun, 1 Sep 2024 19:48:42 +0100 Subject: [PATCH] multi-source search --- .../api/v1/resources/resource_analyser.py | 77 ++++++++++++++++--- .../swagger_docs/search_for_any_value.yml | 6 ++ .../resources/swagger_docs/searchbyvalue.yml | 5 ++ .../swagger_docs/searchvaluesusingkey.yml | 10 +++ omero_search_engine/api/v1/resources/urls.py | 8 +- .../validation/results_validator.py | 6 +- 6 files changed, 94 insertions(+), 18 deletions(-) diff --git a/omero_search_engine/api/v1/resources/resource_analyser.py b/omero_search_engine/api/v1/resources/resource_analyser.py index 4191832f..61f9569c 100644 --- a/omero_search_engine/api/v1/resources/resource_analyser.py +++ b/omero_search_engine/api/v1/resources/resource_analyser.py @@ -379,6 +379,7 @@ def prepare_search_results(results, size=0): continue row = {} returned_results.append(row) + row["Data Source"]=res["data_source"] row["Key"] = res["Attribute"] row["Value"] = res["Value"] row["Number of %ss" % resource] = res.get("items_in_the_bucket") @@ -474,7 +475,7 @@ def get_key_values_return_contents(name, resource, data_source, csv): def query_cashed_bucket_part_value_keys( - name, value, resource, es_index="key_value_buckets_information" + name, value, data_source, resource, es_index="key_value_buckets_information" ): """ Search for and obtain the available values for an attribute and part of the @@ -486,9 +487,14 @@ def query_cashed_bucket_part_value_keys( if name: name = name.strip() value = adjust_value(value) + if data_source and data_source.strip() and data_source.lower() != "all": + data_source = [itm.strip().lower() for itm in data_source.split(',')] + else: + data_source = get_data_sources() + if resource != "all": query = key_part_values_buckets_template.substitute( - name=name, value=value, resource=resource + name=name, value=value, resource=resource, data_source=json.dumps(data_source) ) res = search_index_for_values_get_all_buckets(es_index, query) returned_results = prepare_search_results_buckets(res) @@ -501,7 +507,7 @@ def query_cashed_bucket_part_value_keys( if table == "image1": continue query = key_part_values_buckets_template.substitute( - name=name, value=value, resource=table + name=name, value=value, resource=table, data_source=json.dumps(data_source) ) res = search_index_for_values_get_all_buckets(es_index, query) returned_results[table] = prepare_search_results_buckets(res) @@ -512,6 +518,11 @@ def query_cashed_bucket( name, resource, data_source, es_index="key_value_buckets_information" ): # returns possible matches for a specific resource + if data_source and data_source.strip() and data_source.lower() != "all": + data_source = [itm.strip().lower() for itm in data_source.split(',')] + else: + data_source =get_data_sources() + if name: name = name.strip() if resource != "all": @@ -527,7 +538,7 @@ def query_cashed_bucket( returned_results = {} for table in resource_elasticsearchindex: query = key_values_buckets_template.substitute( - name=name, resource=table, data_source=json.dumps([data_source]) + name=name, resource=table, data_source=json.dumps(data_source) ) res = search_index_for_values_get_all_buckets(es_index, query) returned_results[table] = prepare_search_results_buckets(res) @@ -549,6 +560,11 @@ def search_value_for_resource( """ value = adjust_value(value) + if data_source and data_source.lower() != "all": + data_source = [itm.strip().lower() for itm in data_source.split(',')] + else: + data_source=get_data_sources() + if table_ != "all": query = resource_key_values_buckets_template.substitute( value=value, resource=table_, data_source=json.dumps(data_source) @@ -606,16 +622,53 @@ def search_value_for_resource( """ key_part_values_buckets_template = Template( """ -{"query":{"bool":{"must":[{"bool":{ -"must":[{"match":{"Attribute.keyrnamenormalize":"$name"}}, -{"wildcard":{"Value.keyvaluenormalize":"*$value*"}} -] -}},{ -"bool": {"must": [ -{"match":{"resource.keyresource": "$resource"}} -]}}]}}}""" +{ + "query":{ + "bool":{ + "must":[ + { + "bool":{ + "must":[ + { + "match":{ + "Attribute.keyrnamenormalize":"$name" + } + }, + { + "wildcard":{ + "Value.keyvaluenormalize":"*$value*" + } + } + ] + } + }, + { + "bool":{ + "must":[ + { + "match":{ + "resource.keyresource":"$resource" + } + } + ] + } + }, + { + "bool":{ + "must":{ + "terms":{ + "data_source.keyvalue":$data_source + } + } + } + } + ] + } + } +}""" ) + # "fields": ["Attribute","Value","items_in_the_bucket", # "total_items_in_saved_buckets","total_buckets","total_items"], # "_source": false, diff --git a/omero_search_engine/api/v1/resources/swagger_docs/search_for_any_value.yml b/omero_search_engine/api/v1/resources/swagger_docs/search_for_any_value.yml index 4c30fce6..783ab177 100644 --- a/omero_search_engine/api/v1/resources/swagger_docs/search_for_any_value.yml +++ b/omero_search_engine/api/v1/resources/swagger_docs/search_for_any_value.yml @@ -24,6 +24,12 @@ parameters: in: query type: string required: false + - name: data_source + in: query + type: string + required: false + description: If it is provided, it will return the search results for a specific data source, otherwise it will return the results from all the data sources + definitions: data: type: object diff --git a/omero_search_engine/api/v1/resources/swagger_docs/searchbyvalue.yml b/omero_search_engine/api/v1/resources/swagger_docs/searchbyvalue.yml index 87e09c7e..344e848b 100644 --- a/omero_search_engine/api/v1/resources/swagger_docs/searchbyvalue.yml +++ b/omero_search_engine/api/v1/resources/swagger_docs/searchbyvalue.yml @@ -32,6 +32,11 @@ parameters: description: bookmark is used to the call the next page if number of results is bigger than 1000, it returns with each reasult page. in: query type: integer + - name: data_source + in: query + type: string + required: false + description: If it is provided, it will return the search results for a specific data source, otherwise it will return the results from all the data sources - name: return_containers in: query type: boolean diff --git a/omero_search_engine/api/v1/resources/swagger_docs/searchvaluesusingkey.yml b/omero_search_engine/api/v1/resources/swagger_docs/searchvaluesusingkey.yml index 37488b86..ae9d7c86 100644 --- a/omero_search_engine/api/v1/resources/swagger_docs/searchvaluesusingkey.yml +++ b/omero_search_engine/api/v1/resources/swagger_docs/searchvaluesusingkey.yml @@ -47,6 +47,16 @@ parameters: in: query type: string required: true + - name: data_source + in: query + type: string + required: false + description: If it is provided, it will return the search results for a specific data source, otherwise it will return the results from all the data sources + - name: data_source + in: query + type: string + required: false + description: If it is provided, it will return the search results for a specific data source, otherwise it will return the results from all the data sources - name: csv description: a flag to return a CSV file which is created on the fly instead of JSON in: query diff --git a/omero_search_engine/api/v1/resources/urls.py b/omero_search_engine/api/v1/resources/urls.py index cb6d039b..d2966d7f 100644 --- a/omero_search_engine/api/v1/resources/urls.py +++ b/omero_search_engine/api/v1/resources/urls.py @@ -189,6 +189,7 @@ def get_values_using_value(resource_table): file: swagger_docs/search_for_any_value.yml """ value = request.args.get("value") + data_source = request.args.get("data_source") if not value: return jsonify( build_error_message("Error: {error}".format(error="No value is provided ")) @@ -206,7 +207,7 @@ def get_values_using_value(resource_table): if key: # If the key is provided it will restrict the search to the provided key. - return query_cashed_bucket_part_value_keys(key, value, resource_table) + return query_cashed_bucket_part_value_keys(key, value,data_source, resource_table) bookmark = request.args.get("bookmark") if bookmark: bookmark = bookmark.split(",") @@ -237,7 +238,7 @@ def get_values_using_value(resource_table): ) ) ) - return jsonify(search_value_for_resource(resource_table, value, bookmark)) + return jsonify(search_value_for_resource(resource_table, value, data_source, bookmark)) @resources.route("//searchvaluesusingkey/", methods=["GET"]) @@ -253,13 +254,14 @@ def search_values_for_a_key(resource_table): # default is false # if it sets to true, a CSV file content will be sent instead of dict csv = request.args.get("csv") + data_source = request.args.get("data_source") if csv: try: csv = json.loads(csv.lower()) except Exception: csv = False - return get_key_values_return_contents(key, resource_table, csv) + return get_key_values_return_contents(key, resource_table,data_source, csv) # getannotationkeys==> keys diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index 2b2a3b6f..f9b10954 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -295,13 +295,13 @@ def get_results_searchengine(self, operator=None): if self.type == "buckets": if self.name: res = get_key_values_return_contents( - self.name, "image", data_source=[self.data_source], csv=False + self.name, "image", data_source=self.data_source, csv=False ) self.searchengine_results = json.loads(res.data) elif self.value: self.searchengine_results = search_value_for_resource( - "image", self.value, [self.data_source] + "image", self.value, self.data_source ) return @@ -892,7 +892,7 @@ def get_omero_stats(): for name in names: if name == "name": continue - returned_results = query_cashed_bucket(name, resource, [data_source]) + returned_results = query_cashed_bucket(name, resource, data_source) if resource == "image": data.append( "%s, %s, %s,%s,%s"