diff --git a/docs/reference/index.asciidoc b/docs/reference/index.asciidoc index 2057519719177..79b5f2b69f24d 100644 --- a/docs/reference/index.asciidoc +++ b/docs/reference/index.asciidoc @@ -32,6 +32,8 @@ include::alias.asciidoc[] include::search/search-your-data/search-your-data.asciidoc[] +include::reranking/index.asciidoc[] + include::query-dsl.asciidoc[] include::aggregations.asciidoc[] diff --git a/docs/reference/reranking/index.asciidoc b/docs/reference/reranking/index.asciidoc new file mode 100644 index 0000000000000..cc6f4a9007424 --- /dev/null +++ b/docs/reference/reranking/index.asciidoc @@ -0,0 +1,70 @@ +[[re-ranking-overview]] += Re-ranking + +Many search systems are built on two-stage retrieval pipelines. + +The first stage uses cheap, fast algorithms to find a broad set of possible matches. + +The second stage uses a more powerful model, often machine learning-based, to reorder the documents. +This second step is called re-ranking. +Because the resource-intensive model is only applied to the smaller set of pre-filtered results, this approach returns more relevant results while still optimizing for search performance and computational costs. + +{es} supports various ranking and re-ranking techniques to optimize search relevance and performance. + +[float] +[[re-ranking-two-stage-pipeline]] +== Two-stage retrieval pipelines + + +[float] +[[re-ranking-first-stage-pipeline]] +=== First stage: initial retrieval + +[float] +[[re-ranking-ranking-overview-bm25]] +==== Full-text search: BM25 scoring + +{es} ranks documents based on term frequency and inverse document frequency, adjusted for document length. +https://en.wikipedia.org/wiki/Okapi_BM25[BM25] is the default statistical scoring algorithm in {es}. + +[float] +[[re-ranking-ranking-overview-vector]] +==== Vector search: similarity scoring + +Vector search involves transforming data into dense or sparse vector embeddings to capture semantic meanings, and computing similarity scores for query vectors. +Store vectors using `semantic_text` fields for automatic inference and vectorization or `dense_vector` and `sparse_vector` fields when you need more control over the underlying embedding model. +Query vector fields with `semantic`, `knn` or `sparse_vector` queries to compute similarity scores. +Refer to <<semantic-search,semantic search>> for more information. + +[float] +[[re-ranking-ranking-overview-hybrid]] +==== Hybrid techniques + +Hybrid search techniques combine results from full-text and vector search pipelines. +{es} enables combining lexical matching (BM25) and vector search scores using the <<rrf,Reciprocal Rank Fusion (RRF)>> algorithm. + +[float] +[[re-ranking-overview-second-stage]] +=== Second stage: Re-ranking + +When using the following advanced re-ranking pipelines, first-stage retrieval mechanisms effectively generate a set of candidates. +These candidates are funneled into the re-ranker to perform more computationally expensive re-ranking tasks. + +[float] +[[re-ranking-overview-semantic]] +==== Semantic re-ranking + +<<semantic-reranking>> uses machine learning models to reorder search results based on their semantic similarity to a query. +Models can be hosted directly in your {es} cluster, or you can use <<inference-apis,inference endpoints>> to call models provided by third-party services. +Semantic re-ranking enables out-of-the-box semantic search capabilities on existing full-text search indices. + +[float] +[[re-ranking-overview-ltr]] +==== Learning to Rank (LTR) + +<<learning-to-rank>> is for advanced users. +Learning To Rank involves training a machine learning model to build a ranking function for your search experience that updates over time. +LTR is best suited for when you have ample training data and need highly customized relevance tuning. + +include::semantic-reranking.asciidoc[] +include::learning-to-rank.asciidoc[] \ No newline at end of file diff --git a/docs/reference/search/search-your-data/learning-to-rank-model-training.asciidoc b/docs/reference/reranking/learning-to-rank-model-training.asciidoc similarity index 100% rename from docs/reference/search/search-your-data/learning-to-rank-model-training.asciidoc rename to docs/reference/reranking/learning-to-rank-model-training.asciidoc diff --git a/docs/reference/search/search-your-data/learning-to-rank-search-usage.asciidoc b/docs/reference/reranking/learning-to-rank-search-usage.asciidoc similarity index 100% rename from docs/reference/search/search-your-data/learning-to-rank-search-usage.asciidoc rename to docs/reference/reranking/learning-to-rank-search-usage.asciidoc diff --git a/docs/reference/search/search-your-data/learning-to-rank.asciidoc b/docs/reference/reranking/learning-to-rank.asciidoc similarity index 99% rename from docs/reference/search/search-your-data/learning-to-rank.asciidoc rename to docs/reference/reranking/learning-to-rank.asciidoc index ebd6d67fe42da..f49f7074d2dbc 100644 --- a/docs/reference/search/search-your-data/learning-to-rank.asciidoc +++ b/docs/reference/reranking/learning-to-rank.asciidoc @@ -131,4 +131,4 @@ In the next pages of this guide you will learn to: * <<learning-to-rank-search-usage, Search using LTR model as a rescorer>> include::learning-to-rank-model-training.asciidoc[] -include::learning-to-rank-search-usage.asciidoc[] +include::learning-to-rank-search-usage.asciidoc[] \ No newline at end of file diff --git a/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc b/docs/reference/reranking/semantic-reranking.asciidoc similarity index 72% rename from docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc rename to docs/reference/reranking/semantic-reranking.asciidoc index add2d7455983e..4ebe90e44708e 100644 --- a/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc +++ b/docs/reference/reranking/semantic-reranking.asciidoc @@ -1,39 +1,35 @@ [[semantic-reranking]] -=== Semantic reranking +== Semantic re-ranking preview::[] [TIP] ==== -This overview focuses more on the high-level concepts and use cases for semantic reranking. For full implementation details on how to set up and use semantic reranking in {es}, see the <<text-similarity-reranker-retriever,reference documentation>> in the Search API docs. +This overview focuses more on the high-level concepts and use cases for semantic re-ranking. For full implementation details on how to set up and use semantic re-ranking in {es}, see the <<text-similarity-reranker-retriever,reference documentation>> in the Search API docs. ==== -Rerankers improve the relevance of results from earlier-stage retrieval mechanisms. -_Semantic_ rerankers use machine learning models to reorder search results based on their semantic similarity to a query. +Re-rankers improve the relevance of results from earlier-stage retrieval mechanisms. +_Semantic_ re-rankers use machine learning models to reorder search results based on their semantic similarity to a query. -First-stage retrievers and rankers must be very fast and efficient because they process either the entire corpus, or all matching documents. -In a multi-stage pipeline, you can progressively use more computationally intensive ranking functions and techniques, as they will operate on smaller result sets at each step. -This helps avoid query latency degradation and keeps costs manageable. - -Semantic reranking requires relatively large and complex machine learning models and operates in real-time in response to queries. +Semantic re-ranking requires relatively large and complex machine learning models and operates in real-time in response to queries. This technique makes sense on a small _top-k_ result set, as one the of the final steps in a pipeline. This is a powerful technique for improving search relevance that works equally well with keyword, semantic, or hybrid retrieval algorithms. -The next sections provide more details on the benefits, use cases, and model types used for semantic reranking. -The final sections include a practical, high-level overview of how to implement <<semantic-reranking-in-es,semantic reranking in {es}>> and links to the full reference documentation. +The next sections provide more details on the benefits, use cases, and model types used for semantic re-ranking. +The final sections include a practical, high-level overview of how to implement <<semantic-reranking-in-es,semantic re-ranking in {es}>> and links to the full reference documentation. [discrete] [[semantic-reranking-use-cases]] -==== Use cases +=== Use cases -Semantic reranking enables a variety of use cases: +Semantic re-ranking enables a variety of use cases: -* *Lexical (BM25) retrieval results reranking* +* *Lexical (BM25) retrieval results re-ranking* ** Out-of-the-box semantic search by adding a simple API call to any lexical/BM25 retrieval pipeline. ** Adds semantic search capabilities on top of existing indices without reindexing, perfect for quick improvements. ** Ideal for environments with complex existing indices. -* *Semantic retrieval results reranking* +* *Semantic retrieval results re-ranking* ** Improves results from semantic retrievers using ELSER sparse vector embeddings or dense vector embeddings by using more powerful models. ** Adds a refinement layer on top of hybrid retrieval with <<rrf, reciprocal rank fusion (RRF)>>. @@ -41,15 +37,15 @@ Semantic reranking enables a variety of use cases: ** Supports automatic and transparent chunking, eliminating the need for pre-chunking at index time. ** Provides explicit control over document relevance in retrieval-augmented generation (RAG) uses cases or other scenarios involving language model (LLM) inputs. -Now that we've outlined the value of semantic reranking, we'll explore the specific models that power this process and how they differ. +Now that we've outlined the value of semantic re-ranking, we'll explore the specific models that power this process and how they differ. [discrete] [[semantic-reranking-models]] -==== Cross-encoder and bi-encoder models +=== Cross-encoder and bi-encoder models -At a high level, two model types are used for semantic reranking: cross-encoders and bi-encoders. +At a high level, two model types are used for semantic re-ranking: cross-encoders and bi-encoders. -NOTE: In this version, {es} *only supports cross-encoders* for semantic reranking. +NOTE: In this version, {es} *only supports cross-encoders* for semantic re-ranking. * A *cross-encoder model* can be thought of as a more powerful, all-in-one solution, because it generates query-aware document representations. It takes the query and document texts as a single, concatenated input. @@ -66,7 +62,7 @@ If you're interested in a more detailed analysis of the practical differences be .Comparisons between cross-encoder and bi-encoder [%collapsible] ============== -The following is a non-exhaustive list of considerations when choosing between cross-encoders and bi-encoders for semantic reranking: +The following is a non-exhaustive list of considerations when choosing between cross-encoders and bi-encoders for semantic re-ranking: * Because a cross-encoder model simultaneously processes both query and document texts, it can better infer their relevance, making it more effective as a reranker than a bi-encoder. * Cross-encoder models are generally larger and more computationally intensive, resulting in higher latencies and increased computational costs. @@ -78,28 +74,28 @@ For example, their ability to take word order into account can improve on dense This enables you to maintain high relevance in result sets, by setting a minimum score threshold for all queries. For example, this is important when using results in a RAG workflow or if you're otherwise feeding results to LLMs. Note that similarity scores from bi-encoders/embedding similarities are _query-dependent_, meaning you cannot set universal cut-offs. -* Bi-encoders rerank using embeddings. You can improve your reranking latency by creating embeddings at ingest-time. These embeddings can be stored for reranking without being indexed for retrieval, reducing your memory footprint. +* Bi-encoders rerank using embeddings. You can improve your re-ranking latency by creating embeddings at ingest-time. These embeddings can be stored for re-ranking without being indexed for retrieval, reducing your memory footprint. ============== [discrete] [[semantic-reranking-in-es]] -==== Semantic reranking in {es} +=== Semantic re-ranking in {es} -In {es}, semantic rerankers are implemented using the {es} <<inference-apis,Inference API>> and a <<retriever,retriever>>. +In {es}, semantic re-rankers are implemented using the {es} <<inference-apis,Inference API>> and a <<retriever,retriever>>. -To use semantic reranking in {es}, you need to: +To use semantic re-ranking in {es}, you need to: -. *Choose a reranking model*. +. *Choose a re-ranking model*. Currently you can: ** Integrate directly with the <<infer-service-cohere,Cohere Rerank inference endpoint>> using the `rerank` task type ** Integrate directly with the <<infer-service-google-vertex-ai,Google Vertex AI inference endpoint>> using the `rerank` task type -** Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]. You'll need to use the `text_similarity` NLP task type when loading the model using Eland. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es} for semantic reranking. +** Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]. You'll need to use the `text_similarity` NLP task type when loading the model using Eland. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es} for semantic re-ranking. *** Then set up an <<inference-example-eland,{es} service inference endpoint>> with the `rerank` task type . *Create a `rerank` task using the <<put-inference-api,{es} Inference API>>*. -The Inference API creates an inference endpoint and configures your chosen machine learning model to perform the reranking task. +The Inference API creates an inference endpoint and configures your chosen machine learning model to perform the re-ranking task. . *Define a `text_similarity_reranker` retriever in your search request*. -The retriever syntax makes it simple to configure both the retrieval and reranking of search results in a single API call. +The retriever syntax makes it simple to configure both the retrieval and re-ranking of search results in a single API call. .*Example search request* with semantic reranker [%collapsible] @@ -134,7 +130,7 @@ POST _search [discrete] [[semantic-reranking-learn-more]] -==== Learn more +=== Learn more * Read the <<retriever,retriever reference documentation>> for syntax and implementation details * Learn more about the <<retrievers-overview,retrievers>> abstraction diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 58cc8ce9ef459..6d3a1a36ad407 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -325,7 +325,7 @@ The `text_similarity_reranker` retriever uses an NLP model to improve search res [TIP] ==== -Refer to <<semantic-reranking>> for a high level overview of semantic reranking. +Refer to <<semantic-reranking>> for a high level overview of semantic re-ranking. ==== ===== Prerequisites @@ -387,7 +387,7 @@ A text similarity re-ranker retriever is a compound retriever. Child retrievers [[text-similarity-reranker-retriever-example-cohere]] ==== Example: Cohere Rerank -This example enables out-of-the-box semantic search by reranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents. +This example enables out-of-the-box semantic search by re-ranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents. This requires a <<infer-service-cohere,Cohere Rerank inference endpoint>> using the `rerank` task type. [source,js] @@ -418,7 +418,7 @@ GET /index/_search [discrete] [[text-similarity-reranker-retriever-example-eland]] -==== Example: Semantic reranking with a Hugging Face model +==== Example: Semantic re-ranking with a Hugging Face model The following example uses the `cross-encoder/ms-marco-MiniLM-L-6-v2` model from Hugging Face to rerank search results based on semantic similarity. The model must be uploaded to {es} using https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html#ml-nlp-pytorch[Eland]. @@ -428,7 +428,7 @@ The model must be uploaded to {es} using https://www.elastic.co/guide/en/elastic Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es}. ==== -Follow these steps to load the model and create a semantic reranker. +Follow these steps to load the model and create a semantic re-ranker. . Install Eland using `pip` + diff --git a/docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc similarity index 100% rename from docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc rename to docs/reference/search/search-your-data/retrievers-overview.asciidoc diff --git a/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc deleted file mode 100644 index 87ed52e365370..0000000000000 --- a/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc +++ /dev/null @@ -1,8 +0,0 @@ -[[retrievers-reranking-overview]] -== Retrievers and reranking - -* <<retrievers-overview>> -* <<semantic-reranking>> - -include::retrievers-overview.asciidoc[] -include::semantic-reranking.asciidoc[] diff --git a/docs/reference/search/search-your-data/search-api.asciidoc b/docs/reference/search/search-your-data/search-api.asciidoc index 98c5a48b7559b..13cea537ea4fb 100644 --- a/docs/reference/search/search-your-data/search-api.asciidoc +++ b/docs/reference/search/search-your-data/search-api.asciidoc @@ -530,3 +530,5 @@ include::retrieve-inner-hits.asciidoc[] include::search-shard-routing.asciidoc[] include::search-using-query-rules.asciidoc[] include::search-template.asciidoc[] +include::retrievers-overview.asciidoc[] + diff --git a/docs/reference/search/search-your-data/search-your-data.asciidoc b/docs/reference/search/search-your-data/search-your-data.asciidoc index a885df2f2179e..cd2b418a7e79b 100644 --- a/docs/reference/search/search-your-data/search-your-data.asciidoc +++ b/docs/reference/search/search-your-data/search-your-data.asciidoc @@ -45,8 +45,6 @@ results directly in the Kibana Search UI. include::search-api.asciidoc[] include::knn-search.asciidoc[] include::semantic-search.asciidoc[] -include::retrievers-reranking/index.asciidoc[] -include::learning-to-rank.asciidoc[] include::search-across-clusters.asciidoc[] include::search-with-synonyms.asciidoc[] include::search-application-overview.asciidoc[]