From d21b410217a101644472de71432c0e97e123e201 Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Tue, 12 Mar 2024 18:09:19 -0700 Subject: [PATCH 1/3] Changing hybrid search section, adding info about aggregations Signed-off-by: Martin Gaievski --- _search-plugins/hybrid-search.md | 196 +++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) diff --git a/_search-plugins/hybrid-search.md b/_search-plugins/hybrid-search.md index ebd014b0de..7e35072c7d 100644 --- a/_search-plugins/hybrid-search.md +++ b/_search-plugins/hybrid-search.md @@ -24,6 +24,7 @@ To use hybrid search, follow these steps: 1. [Ingest documents into the index](#step-3-ingest-documents-into-the-index). 1. [Configure a search pipeline](#step-4-configure-a-search-pipeline). 1. [Search the index using hybrid search](#step-5-search-the-index-using-hybrid-search). +1. [Enhance results of hybrid search](#step-6-enhance-results-of-hybrid-search). ## Step 1: Create an ingest pipeline @@ -216,3 +217,198 @@ The response contains the matching document: } } ``` + +## Step 6: Enhance results of hybrid search + +You cam combine hybrid query clause with other OpenSearch query features to enhance search experience. Following existing clauses can be used together with hybrid query: + - Aggregations + +### Aggregations +Aggregations allow you to use OpenSearch as analytics engine. For more detailed information about aggregations, see [`Aggregations`]({{site.url}}{{site.baseurl}}/aggregations/). + +Hybrid query can be combined with any aggregation that is supported by OpenSearch. Aggregation performed on sub-set of documents that is returned by hybrid query, except for [`global`]({{site.url}}{{site.baseurl}}/aggregations/bucket/global/) +aggregation that is done for all documents. + +To illustrate how aggregations can be used with hybrid query first let's create index. Aggregations typically used with a special field types, like `keyword` or `integer`. Following example will create index with fields of these types: + +```json +PUT /my-nlp-index +{ + "settings": { + "number_of_shards": 2 + }, + "mappings": { + "properties": { + "doc_index": { + "type": "integer" + }, + "doc_keyword": { + "type": "keyword" + }, + "category": { + "type": "keyword" + } + } + } +} +``` + +Following requests will ingest 6 documents to our new index: + +```json +POST /_bulk +{ "index": { "_index": "my-nlp-index" } } +{ "category": "permission", "doc_keyword": "workable", "doc_index": 4976, "doc_price": 100} +{ "index": { "_index": "my-nlp-index" } } +{ "category": "sister", "doc_keyword": "angry", "doc_index": 2231, "doc_price": 200 } +{ "index": { "_index": "my-nlp-index" } } +{ "category": "hair", "doc_keyword": "likeable", "doc_price": 25 } +{ "index": { "_index": "my-nlp-index" } } +{ "category": "editor", "doc_index": 9871, "doc_price": 30 } +{ "index": { "_index": "my-nlp-index" } } +{ "category": "statement", "doc_keyword": "entire", "doc_index": 8242, "doc_price": 350 } +{ "index": { "_index": "my-nlp-index" } } +{ "category": "statement", "doc_keyword": "idea", "doc_index": 5212, "doc_price": 200 } +{ "index": { "_index": "index-test" } } +{ "category": "editor", "doc_keyword": "bubble", "doc_index": 1298, "doc_price": 130 } +{ "index": { "_index": "index-test" } } +{ "category": "editor", "doc_keyword": "bubble", "doc_index": 521, "doc_price": 75 } +``` + + +The following example request combines hybrid query with min aggregation: + +```json +GET /my-nlp-index/_search?search_pipeline=nlp-search-pipeline +{ + "query": { + "hybrid": { + "queries": [ + { + "term": { + "category": "permission" + } + }, + { + "bool": { + "should": [ + { + "term": { + "category": "editor" + } + }, + { + "term": { + "category": "statement" + } + } + ] + } + } + ] + } + }, + "aggs": { + "total_price": { + "sum": { + "field": "doc_price" + } + }, + "keywords": { + "terms": { + "field": "doc_keyword", + "size": 10 + } + } + } +} +``` + +The response contains the matching document and results of aggregations: +```json +{ + "took": 9, + "timed_out": false, + "_shards": { + "total": 2, + "successful": 2, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 0.5, + "hits": [ + { + "_index": "my-nlp-index", + "_id": "mHRPNY4BlN82W_Ar9UMY", + "_score": 0.5, + "_source": { + "doc_price": 100, + "doc_index": 4976, + "doc_keyword": "workable", + "category": "permission" + } + }, + { + "_index": "my-nlp-index", + "_id": "m3RPNY4BlN82W_Ar9UMY", + "_score": 0.5, + "_source": { + "doc_price": 30, + "doc_index": 9871, + "category": "editor" + } + }, + { + "_index": "my-nlp-index", + "_id": "nXRPNY4BlN82W_Ar9UMY", + "_score": 0.5, + "_source": { + "doc_price": 200, + "doc_index": 5212, + "doc_keyword": "idea", + "category": "statement" + } + }, + { + "_index": "my-nlp-index", + "_id": "nHRPNY4BlN82W_Ar9UMY", + "_score": 0.5, + "_source": { + "doc_price": 350, + "doc_index": 8242, + "doc_keyword": "entire", + "category": "statement" + } + } + ] + }, + "aggregations": { + "total_price": { + "value": 680.0 + }, + "doc_keywords": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "entire", + "doc_count": 1 + }, + { + "key": "idea", + "doc_count": 1 + }, + { + "key": "workable", + "doc_count": 1 + } + ] + } + } +} +``` \ No newline at end of file From 62c5e3ad40dc74fc1d4ffb4bfd374e0bdb8c5b97 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Mon, 18 Mar 2024 15:18:06 -0400 Subject: [PATCH 2/3] Doc review Signed-off-by: Fanit Kolchina --- _search-plugins/hybrid-search.md | 282 +++++++++++++++---------------- 1 file changed, 140 insertions(+), 142 deletions(-) diff --git a/_search-plugins/hybrid-search.md b/_search-plugins/hybrid-search.md index 7e35072c7d..21f60f6bca 100644 --- a/_search-plugins/hybrid-search.md +++ b/_search-plugins/hybrid-search.md @@ -24,7 +24,7 @@ To use hybrid search, follow these steps: 1. [Ingest documents into the index](#step-3-ingest-documents-into-the-index). 1. [Configure a search pipeline](#step-4-configure-a-search-pipeline). 1. [Search the index using hybrid search](#step-5-search-the-index-using-hybrid-search). -1. [Enhance results of hybrid search](#step-6-enhance-results-of-hybrid-search). +1. [Enhance hybrid search results](#step-6-optional-enhance-hybrid-search-results). ## Step 1: Create an ingest pipeline @@ -218,42 +218,38 @@ The response contains the matching document: } ``` -## Step 6: Enhance results of hybrid search +## Step 6 (Optional): Enhance hybrid search results -You cam combine hybrid query clause with other OpenSearch query features to enhance search experience. Following existing clauses can be used together with hybrid query: - - Aggregations +You can enhance search results by combining a hybrid query clause with any aggregation that OpenSearch supports. Aggregations allow you to use OpenSearch as an analytics engine. For more information about aggregations, see [Aggregations]({{site.url}}{{site.baseurl}}/aggregations/). -### Aggregations -Aggregations allow you to use OpenSearch as analytics engine. For more detailed information about aggregations, see [`Aggregations`]({{site.url}}{{site.baseurl}}/aggregations/). +Most aggregations are performed on the subset of documents that is returned by a hybrid query. The only aggregation that operates on all documents is the [`global`]({{site.url}}{{site.baseurl}}/aggregations/bucket/global/) aggregation. -Hybrid query can be combined with any aggregation that is supported by OpenSearch. Aggregation performed on sub-set of documents that is returned by hybrid query, except for [`global`]({{site.url}}{{site.baseurl}}/aggregations/bucket/global/) -aggregation that is done for all documents. - -To illustrate how aggregations can be used with hybrid query first let's create index. Aggregations typically used with a special field types, like `keyword` or `integer`. Following example will create index with fields of these types: +To illustrate how aggregations can be used with hybrid query, first let's create an index. Aggregations are typically used on fields of special types, like `keyword` or `integer`. The following example creates an index with several such fields: ```json PUT /my-nlp-index { - "settings": { - "number_of_shards": 2 - }, - "mappings": { - "properties": { - "doc_index": { - "type": "integer" - }, - "doc_keyword": { - "type": "keyword" - }, - "category": { - "type": "keyword" - } - } + "settings": { + "number_of_shards": 2 + }, + "mappings": { + "properties": { + "doc_index": { + "type": "integer" + }, + "doc_keyword": { + "type": "keyword" + }, + "category": { + "type": "keyword" + } } + } } ``` +{% include copy-curl.html %} -Following requests will ingest 6 documents to our new index: +The following request ingests six documents to your new index: ```json POST /_bulk @@ -274,141 +270,143 @@ POST /_bulk { "index": { "_index": "index-test" } } { "category": "editor", "doc_keyword": "bubble", "doc_index": 521, "doc_price": 75 } ``` +{% include copy-curl.html %} - -The following example request combines hybrid query with min aggregation: +Now you can combine a hybrid query clause with a `min` aggregation: ```json GET /my-nlp-index/_search?search_pipeline=nlp-search-pipeline { - "query": { - "hybrid": { - "queries": [ - { - "term": { - "category": "permission" - } - }, - { - "bool": { - "should": [ - { - "term": { - "category": "editor" - } - }, - { - "term": { - "category": "statement" - } - } - ] - } + "query": { + "hybrid": { + "queries": [ + { + "term": { + "category": "permission" + } + }, + { + "bool": { + "should": [ + { + "term": { + "category": "editor" + } + }, + { + "term": { + "category": "statement" } + } ] + } } + ] + } + }, + "aggs": { + "total_price": { + "sum": { + "field": "doc_price" + } }, - "aggs": { - "total_price": { - "sum": { - "field": "doc_price" - } - }, - "keywords": { - "terms": { - "field": "doc_keyword", - "size": 10 - } - } + "keywords": { + "terms": { + "field": "doc_keyword", + "size": 10 + } } + } } ``` +{% include copy-curl.html %} + +The response contains the matching documents and the aggregation results: -The response contains the matching document and results of aggregations: ```json { - "took": 9, - "timed_out": false, - "_shards": { - "total": 2, - "successful": 2, - "skipped": 0, - "failed": 0 + "took": 9, + "timed_out": false, + "_shards": { + "total": 2, + "successful": 2, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4, + "relation": "eq" }, - "hits": { - "total": { - "value": 4, - "relation": "eq" - }, - "max_score": 0.5, - "hits": [ - { - "_index": "my-nlp-index", - "_id": "mHRPNY4BlN82W_Ar9UMY", - "_score": 0.5, - "_source": { - "doc_price": 100, - "doc_index": 4976, - "doc_keyword": "workable", - "category": "permission" - } - }, - { - "_index": "my-nlp-index", - "_id": "m3RPNY4BlN82W_Ar9UMY", - "_score": 0.5, - "_source": { - "doc_price": 30, - "doc_index": 9871, - "category": "editor" - } - }, - { - "_index": "my-nlp-index", - "_id": "nXRPNY4BlN82W_Ar9UMY", - "_score": 0.5, - "_source": { - "doc_price": 200, - "doc_index": 5212, - "doc_keyword": "idea", - "category": "statement" - } - }, - { - "_index": "my-nlp-index", - "_id": "nHRPNY4BlN82W_Ar9UMY", - "_score": 0.5, - "_source": { - "doc_price": 350, - "doc_index": 8242, - "doc_keyword": "entire", - "category": "statement" - } - } - ] + "max_score": 0.5, + "hits": [ + { + "_index": "my-nlp-index", + "_id": "mHRPNY4BlN82W_Ar9UMY", + "_score": 0.5, + "_source": { + "doc_price": 100, + "doc_index": 4976, + "doc_keyword": "workable", + "category": "permission" + } + }, + { + "_index": "my-nlp-index", + "_id": "m3RPNY4BlN82W_Ar9UMY", + "_score": 0.5, + "_source": { + "doc_price": 30, + "doc_index": 9871, + "category": "editor" + } + }, + { + "_index": "my-nlp-index", + "_id": "nXRPNY4BlN82W_Ar9UMY", + "_score": 0.5, + "_source": { + "doc_price": 200, + "doc_index": 5212, + "doc_keyword": "idea", + "category": "statement" + } + }, + { + "_index": "my-nlp-index", + "_id": "nHRPNY4BlN82W_Ar9UMY", + "_score": 0.5, + "_source": { + "doc_price": 350, + "doc_index": 8242, + "doc_keyword": "entire", + "category": "statement" + } + } + ] + }, + "aggregations": { + "total_price": { + "value": 680 }, - "aggregations": { - "total_price": { - "value": 680.0 + "doc_keywords": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "entire", + "doc_count": 1 }, - "doc_keywords": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "entire", - "doc_count": 1 - }, - { - "key": "idea", - "doc_count": 1 - }, - { - "key": "workable", - "doc_count": 1 - } - ] + { + "key": "idea", + "doc_count": 1 + }, + { + "key": "workable", + "doc_count": 1 } + ] } + } } ``` \ No newline at end of file From 1556ee313f6aa1e9f2daa1f0af378014f68ed268 Mon Sep 17 00:00:00 2001 From: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Date: Thu, 21 Mar 2024 11:15:11 -0400 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Nathan Bower Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> --- _search-plugins/hybrid-search.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_search-plugins/hybrid-search.md b/_search-plugins/hybrid-search.md index 21f60f6bca..4ec8eabadf 100644 --- a/_search-plugins/hybrid-search.md +++ b/_search-plugins/hybrid-search.md @@ -224,7 +224,7 @@ You can enhance search results by combining a hybrid query clause with any aggre Most aggregations are performed on the subset of documents that is returned by a hybrid query. The only aggregation that operates on all documents is the [`global`]({{site.url}}{{site.baseurl}}/aggregations/bucket/global/) aggregation. -To illustrate how aggregations can be used with hybrid query, first let's create an index. Aggregations are typically used on fields of special types, like `keyword` or `integer`. The following example creates an index with several such fields: +To use aggregations with a hybrid query, first create an index. Aggregations are typically used on fields of special types, like `keyword` or `integer`. The following example creates an index with several such fields: ```json PUT /my-nlp-index @@ -249,7 +249,7 @@ PUT /my-nlp-index ``` {% include copy-curl.html %} -The following request ingests six documents to your new index: +The following request ingests six documents into your new index: ```json POST /_bulk