From 49ba92873708fbfc0adf1f1e6338e50fd5c86221 Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Thu, 18 Jul 2024 08:41:00 +0100 Subject: [PATCH] [DOCS] Retrievers and rerankers (#110007) Co-authored-by: Adam Demjen --- docs/reference/search/retriever.asciidoc | 67 ++++++++ .../retrievers-reranking/index.asciidoc | 8 + .../retrievers-overview.asciidoc | 71 ++++---- .../semantic-reranking.asciidoc | 151 ++++++++++++++++++ .../search-your-data.asciidoc | 2 +- 5 files changed, 261 insertions(+), 38 deletions(-) create mode 100644 docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc rename docs/reference/search/search-your-data/{ => retrievers-reranking}/retrievers-overview.asciidoc (75%) create mode 100644 docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 590df272cc89e..ed39ac786880b 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -28,6 +28,9 @@ A <> that replaces the functionality of a <> that produces top documents from <>. +`text_similarity_reranker`:: +A <> that enhances search results by re-ranking documents based on semantic similarity to a specified inference text, using a machine learning model. + [[standard-retriever]] ==== Standard Retriever @@ -201,6 +204,70 @@ GET /index/_search ---- // NOTCONSOLE +[[text-similarity-reranker-retriever]] +==== Text Similarity Re-ranker Retriever + +The `text_similarity_reranker` is a type of retriever that enhances search results by re-ranking documents based on semantic similarity to a specified inference text, using a machine learning model. + +===== Prerequisites + +To use `text_similarity_reranker` you must first set up a `rerank` task using the <>. +The `rerank` task should be set up with a machine learning model that can compute text similarity. +Currently you can integrate directly with the Cohere Rerank endpoint using the <> task, or upload a model to {es} <>. + +===== Parameters + +`field`:: +(Required, `string`) ++ +The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the `inferenceText`. + +`inference_id`:: +(Required, `string`) ++ +Unique identifier of the inference endpoint created using the {infer} API. + +`inference_text`:: +(Required, `string`) ++ +The text snippet used as the basis for similarity comparison. + +`rank_window_size`:: +(Optional, `int`) ++ +The number of top documents to consider in the re-ranking process. Defaults to `10`. + +`min_score`:: +(Optional, `float`) ++ +Sets a minimum threshold score for including documents in the re-ranked results. Documents with similarity scores below this threshold will be excluded. Note that score calculations vary depending on the model used. + +===== Restrictions + +A text similarity re-ranker retriever is a compound retriever. Child retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. + +===== Example + +[source,js] +---- +GET /index/_search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { ... } + } + }, + "field": "text", + "inference_id": "my-cohere-rerank-model", + "inference_text": "Most famous landmark in Paris", + "rank_window_size": 100, + "min_score": 0.5 + } +} +---- +// NOTCONSOLE + ==== Using `from` and `size` with a retriever tree The <> and <> diff --git a/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc new file mode 100644 index 0000000000000..87ed52e365370 --- /dev/null +++ b/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc @@ -0,0 +1,8 @@ +[[retrievers-reranking-overview]] +== Retrievers and reranking + +* <> +* <> + +include::retrievers-overview.asciidoc[] +include::semantic-reranking.asciidoc[] diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc similarity index 75% rename from docs/reference/search/search-your-data/retrievers-overview.asciidoc rename to docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc index 92cd085583916..99659ae76e092 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc @@ -1,7 +1,5 @@ [[retrievers-overview]] -== Retrievers - -// Will move to a top level "Retrievers and reranking" section once reranking is live +=== Retrievers preview::[] @@ -15,33 +13,32 @@ For implementation details, including notable restrictions, check out the [discrete] [[retrievers-overview-types]] -=== Retriever types +==== Retriever types Retrievers come in various types, each tailored for different search operations. The following retrievers are currently available: -* <>. -Returns top documents from a traditional https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl.html[query]. -Mimics a traditional query but in the context of a retriever framework. -This ensures backward compatibility as existing `_search` requests remain supported. -That way you can transition to the new abstraction at your own pace without mixing syntaxes. -* <>. -Returns top documents from a <>, in the context of a retriever framework. -* <>. -Combines and ranks multiple first-stage retrievers using the reciprocal rank fusion (RRF) algorithm. -Allows you to combine multiple result sets with different relevance indicators into a single result set. -An RRF retriever is a *compound retriever*, where its `filter` element is propagated to its sub retrievers. +* <>. Returns top documents from a +traditional https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl.html[query]. +Mimics a traditional query but in the context of a retriever framework. This +ensures backward compatibility as existing `_search` requests remain supported. +That way you can transition to the new abstraction at your own pace without +mixing syntaxes. +* <>. Returns top documents from a <>, +in the context of a retriever framework. +* <>. Combines and ranks multiple first-stage retrievers using +the reciprocal rank fusion (RRF) algorithm. Allows you to combine multiple result sets +with different relevance indicators into a single result set. +An RRF retriever is a *compound retriever*, where its `filter` element is +propagated to its sub retrievers. + Sub retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. See the <> for detailed examples and information on how to use the RRF retriever. - -[NOTE] -==== -Stay tuned for more retriever types in future releases! -==== +* <>. Used for <>. +Requires first creating a `rerank` task using the <>. [discrete] -=== What makes retrievers useful? +==== What makes retrievers useful? Here's an overview of what makes retrievers useful and how they differ from regular queries. @@ -73,7 +70,7 @@ When using compound retrievers, only the query element is allowed, which enforce [discrete] [[retrievers-overview-example]] -=== Example +==== Example The following example demonstrates how using retrievers simplify the composability of queries for RRF ranking. @@ -154,25 +151,23 @@ GET example-index/_search [discrete] [[retrievers-overview-glossary]] -=== Glossary +==== Glossary Here are some important terms: -* *Retrieval Pipeline*. -Defines the entire retrieval and ranking logic to produce top hits. -* *Retriever Tree*. -A hierarchical structure that defines how retrievers interact. -* *First-stage Retriever*. -Returns an initial set of candidate documents. -* *Compound Retriever*. -Builds on one or more retrievers, enhancing document retrieval and ranking logic. -* *Combiners*. -Compound retrievers that merge top hits from multiple sub-retrievers. -//* NOT YET *Rerankers*. Special compound retrievers that reorder hits and may adjust the number of hits, with distinctions between first-stage and second-stage rerankers. +* *Retrieval Pipeline*. Defines the entire retrieval and ranking logic to +produce top hits. +* *Retriever Tree*. A hierarchical structure that defines how retrievers interact. +* *First-stage Retriever*. Returns an initial set of candidate documents. +* *Compound Retriever*. Builds on one or more retrievers, +enhancing document retrieval and ranking logic. +* *Combiners*. Compound retrievers that merge top hits +from multiple sub-retrievers. +* *Rerankers*. Special compound retrievers that reorder hits and may adjust the number of hits, with distinctions between first-stage and second-stage rerankers. [discrete] [[retrievers-overview-play-in-search]] -=== Retrievers in action +==== Retrievers in action The Search Playground builds Elasticsearch queries using the retriever abstraction. It automatically detects the fields and types in your index and builds a retriever tree based on your selections. @@ -180,7 +175,9 @@ It automatically detects the fields and types in your index and builds a retriev You can use the Playground to experiment with different retriever configurations and see how they affect search results. Refer to the {kibana-ref}/playground.html[Playground documentation] for more information. -// Content coming in https://github.com/elastic/kibana/pull/182692 - +[discrete] +[[retrievers-overview-api-reference]] +==== API reference +For implementation details, including notable restrictions, check out the <> in the Search API docs. \ No newline at end of file diff --git a/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc new file mode 100644 index 0000000000000..75c06aa953302 --- /dev/null +++ b/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc @@ -0,0 +1,151 @@ +[[semantic-reranking]] +=== Semantic reranking + +preview::[] + +[TIP] +==== +This overview focuses more on the high-level concepts and use cases for semantic reranking. For full implementation details on how to set up and use semantic reranking in {es}, see the <> in the Search API docs. +==== + +Rerankers improve the relevance of results from earlier-stage retrieval mechanisms. +_Semantic_ rerankers use machine learning models to reorder search results based on their semantic similarity to a query. + +First-stage retrievers and rankers must be very fast and efficient because they process either the entire corpus, or all matching documents. +In a multi-stage pipeline, you can progressively use more computationally intensive ranking functions and techniques, as they will operate on smaller result sets at each step. +This helps avoid query latency degradation and keeps costs manageable. + +Semantic reranking requires relatively large and complex machine learning models and operates in real-time in response to queries. +This technique makes sense on a small _top-k_ result set, as one the of the final steps in a pipeline. +This is a powerful technique for improving search relevance that works equally well with keyword, semantic, or hybrid retrieval algorithms. + +The next sections provide more details on the benefits, use cases, and model types used for semantic reranking. +The final sections include a practical, high-level overview of how to implement <> and links to the full reference documentation. + +[discrete] +[[semantic-reranking-use-cases]] +==== Use cases + +Semantic reranking enables a variety of use cases: + +* *Lexical (BM25) retrieval results reranking* +** Out-of-the-box semantic search by adding a simple API call to any lexical/BM25 retrieval pipeline. +** Adds semantic search capabilities on top of existing indices without reindexing, perfect for quick improvements. +** Ideal for environments with complex existing indices. + +* *Semantic retrieval results reranking* +** Improves results from semantic retrievers using ELSER sparse vector embeddings or dense vector embeddings by using more powerful models. +** Adds a refinement layer on top of hybrid retrieval with <>. + +* *General applications* +** Supports automatic and transparent chunking, eliminating the need for pre-chunking at index time. +** Provides explicit control over document relevance in retrieval-augmented generation (RAG) uses cases or other scenarios involving language model (LLM) inputs. + +Now that we've outlined the value of semantic reranking, we'll explore the specific models that power this process and how they differ. + +[discrete] +[[semantic-reranking-models]] +==== Cross-encoder and bi-encoder models + +At a high level, two model types are used for semantic reranking: cross-encoders and bi-encoders. + +NOTE: In this version, {es} *only supports cross-encoders* for semantic reranking. + +* A *cross-encoder model* can be thought of as a more powerful, all-in-one solution, because it generates query-aware document representations. +It takes the query and document texts as a single, concatenated input. +* A *bi-encoder model* takes as input either document or query text. +Documents and query embeddings are computed separately, so they aren't aware of each other. +** To compute a ranking score, an external operation is required. This typically involves computing dot-product or cosine similarity between the query and document embeddings. + +In brief, cross-encoders provide high accuracy but are more resource-intensive. +Bi-encoders are faster and more cost-effective but less precise. + +In future versions, {es} will also support bi-encoders. +If you're interested in a more detailed analysis of the practical differences between cross-encoders and bi-encoders, untoggle the next section. + +.Comparisons between cross-encoder and bi-encoder +[%collapsible] +============== +The following is a non-exhaustive list of considerations when choosing between cross-encoders and bi-encoders for semantic reranking: + +* Because a cross-encoder model simultaneously processes both query and document texts, it can better infer their relevance, making it more effective as a reranker than a bi-encoder. +* Cross-encoder models are generally larger and more computationally intensive, resulting in higher latencies and increased computational costs. +* There are significantly fewer open-source cross-encoders, while bi-encoders offer a wide variety of sizes, languages, and other trade-offs. +* The effectiveness of cross-encoders can also improve the relevance of semantic retrievers. +For example, their ability to take word order into account can improve on dense or sparse embedding retrieval. +* When trained in tandem with specific retrievers (like lexical/BM25), cross-encoders can “correct” typical errors made by those retrievers. +* Cross-encoders output scores that are consistent across queries. +This enables you to maintain high relevance in result sets, by setting a minimum score threshold for all queries. +For example, this is important when using results in a RAG workflow or if you're otherwise feeding results to LLMs. +Note that similarity scores from bi-encoders/embedding similarities are _query-dependent_, meaning you cannot set universal cut-offs. +* Bi-encoders rerank using embeddings. You can improve your reranking latency by creating embeddings at ingest-time. These embeddings can be stored for reranking without being indexed for retrieval, reducing your memory footprint. +============== + +[discrete] +[[semantic-reranking-in-es]] +==== Semantic reranking in {es} + +In {es}, semantic rerankers are implemented using the {es} <> and a <>. + +To use semantic reranking in {es}, you need to: + +. Choose a reranking model. In addition to cross-encoder models running on {es} inference nodes, we also expose external models and services via the Inference API to semantic rerankers. +** This includes cross-encoder models running in https://huggingface.co/inference-endpoints[HuggingFace Inference Endpoints] and the https://cohere.com/rerank[Cohere Rerank API]. +. Create a `rerank` task using the <>. +The Inference API creates an inference endpoint and configures your chosen machine learning model to perform the reranking task. +. Define a `text_similarity_reranker` retriever in your search request. +The retriever syntax makes it simple to configure both the retrieval and reranking of search results in a single API call. + +.*Example search request* with semantic reranker +[%collapsible] +============== +The following example shows a search request that uses a semantic reranker to reorder the top-k documents based on their semantic similarity to the query. +[source,console] +---- +POST _search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "match": { + "text": "How often does the moon hide the sun?" + } + } + } + }, + "field": "text", + "inference_id": "my-cohere-rerank-model", + "inference_text": "How often does the moon hide the sun?", + "rank_window_size": 100, + "min_score": 0.5 + } + } +} +---- +// TEST[skip:TBD] +============== + +[discrete] +[[semantic-reranking-types]] +==== Supported reranking types + +The following `text_similarity_reranker` model configuration options are available. + +*Text similarity with cross-encoder* + +This solution uses a hosted or 3rd party inference service which relies on a cross-encoder model. +The model receives the text fields from the _top-K_ documents, as well as the search query, and calculates scores directly, which are then used to rerank the documents. + +Used with the Cohere inference service rolled out in 8.13, turn on semantic reranking that works out of the box. +Check out our https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/cohere/cohere-elasticsearch.ipynb[Python notebook] for using Cohere with {es}. + +[discrete] +[[semantic-reranking-learn-more]] +==== Learn more + +* Read the <> for syntax and implementation details +* Learn more about the <> abstraction +* Learn more about the Elastic <> +* Check out our https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/cohere/cohere-elasticsearch.ipynb[Python notebook] for using Cohere with {es} \ No newline at end of file diff --git a/docs/reference/search/search-your-data/search-your-data.asciidoc b/docs/reference/search/search-your-data/search-your-data.asciidoc index e1c1618410f2f..a885df2f2179e 100644 --- a/docs/reference/search/search-your-data/search-your-data.asciidoc +++ b/docs/reference/search/search-your-data/search-your-data.asciidoc @@ -45,7 +45,7 @@ results directly in the Kibana Search UI. include::search-api.asciidoc[] include::knn-search.asciidoc[] include::semantic-search.asciidoc[] -include::retrievers-overview.asciidoc[] +include::retrievers-reranking/index.asciidoc[] include::learning-to-rank.asciidoc[] include::search-across-clusters.asciidoc[] include::search-with-synonyms.asciidoc[]