From 3735481aacdcc6a115e5df005eaf99e4af0239e7 Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Wed, 27 Nov 2024 11:58:23 +0100 Subject: [PATCH] [DOCS] Add Elastic Rerank usage docs --- .../inference/service-elasticsearch.asciidoc | 31 +++++++- .../reranking/semantic-reranking.asciidoc | 14 ++-- docs/reference/search/retriever.asciidoc | 70 ++++++++++++++++++- 3 files changed, 106 insertions(+), 9 deletions(-) diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index 0103b425faefe..f859d5fb39d16 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -119,7 +119,6 @@ include::inference-shared.asciidoc[tag=task-settings] Returns the document instead of only the index. Defaults to `true`. ===== - [discrete] [[inference-example-elasticsearch-elser]] ==== ELSER via the `elasticsearch` service @@ -150,6 +149,34 @@ PUT _inference/sparse_embedding/my-elser-model Valid values are `.elser_model_2` and `.elser_model_2_linux-x86_64`. For further details, refer to the {ml-docs}/ml-nlp-elser.html[ELSER model documentation]. +[discrete] +[[inference-example-elastic-reranker]] +==== Elastic Rerank via the `elasticsearch` service + +The following example shows how to create an {infer} endpoint called `my-elastic-rerank` to perform a `rerank` task type using the built-in Elastic Rerank cross-encoder model. + +The API request below will automatically download the Elastic Rerank model if it isn't already downloaded and then deploy the model. +Once deployed, the model can be used for semantic re-ranking with a <>. + +[source,console] +------------------------------------------------------------ +PUT _inference/rerank/my-elastic-rerank +{ + "service": "elasticsearch", + "service_settings": { + "model_id": ".rerank-v1", <1> + "num_threads": 1, + "adaptive_allocations": { <2> + "enabled": true, + "min_number_of_allocations": 1, + "max_number_of_allocations": 10 + } + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The `model_id` must be the ID of the built-in Elastic Rerank model: `.rerank-v1`. +<2> {ml-docs}/ml-nlp-auto-scale.html#nlp-model-adaptive-allocations[Adaptive allocations] will be enabled with the minimum of 1 and the maximum of 10 allocations. [discrete] [[inference-example-elasticsearch]] @@ -186,7 +213,7 @@ If using the Python client, you can set the `timeout` parameter to a higher valu [discrete] [[inference-example-eland]] -==== Models uploaded by Eland via the elasticsearch service +==== Models uploaded by Eland via the `elasticsearch` service The following example shows how to create an {infer} endpoint called `my-msmarco-minilm-model` to perform a `text_embedding` task type. diff --git a/docs/reference/reranking/semantic-reranking.asciidoc b/docs/reference/reranking/semantic-reranking.asciidoc index 4ebe90e44708e..2179e07481661 100644 --- a/docs/reference/reranking/semantic-reranking.asciidoc +++ b/docs/reference/reranking/semantic-reranking.asciidoc @@ -86,12 +86,14 @@ In {es}, semantic re-rankers are implemented using the {es} <> cross-encoder model via the inference API's {es} service. +.. Integrate directly with the <> using the `rerank` task type +.. Integrate directly with the <> using the `rerank` task type +.. Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]. You'll need to use the `text_similarity` NLP task type when loading the model using Eland. Then set up an <> with the `rerank` task type. ++ +Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es} for semantic re-ranking. -** Integrate directly with the <> using the `rerank` task type -** Integrate directly with the <> using the `rerank` task type -** Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]. You'll need to use the `text_similarity` NLP task type when loading the model using Eland. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es} for semantic re-ranking. -*** Then set up an <> with the `rerank` task type . *Create a `rerank` task using the <>*. The Inference API creates an inference endpoint and configures your chosen machine learning model to perform the re-ranking task. . *Define a `text_similarity_reranker` retriever in your search request*. @@ -117,7 +119,7 @@ POST _search } }, "field": "text", - "inference_id": "my-cohere-rerank-model", + "inference_id": "my-elastic-rerank", "inference_text": "How often does the moon hide the sun?", "rank_window_size": 100, "min_score": 0.5 diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 86a81f1d155d2..4a381b8c5a713 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -11,6 +11,7 @@ This allows for complex behavior to be depicted in a tree-like structure, called [TIP] ==== Refer to <> for a high level overview of the retrievers abstraction. +Refer to <> for additional examples. ==== The following retrievers are available: @@ -386,8 +387,9 @@ To use `text_similarity_reranker` you must first set up a `rerank` task using th The `rerank` task should be set up with a machine learning model that can compute text similarity. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third-party text similarity models supported by {es}. -Currently you can: +You have the following options: +* Use the the built-in <> cross-encoder model via the inference API's {es} service. * Integrate directly with the <> using the `rerank` task type * Integrate directly with the <> using the `rerank` task type * Upload a model to {es} with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland] using the `text_similarity` NLP task type. @@ -436,6 +438,62 @@ Note that score calculations vary depending on the model used. Applies the specified <> to the child <>. If the child retriever already specifies any filters, then this top-level filter is applied in conjuction with the filter defined in the child retriever. +[discrete] +[[text-similarity-reranker-retriever-example-elastic-rerank]] +==== Example: Elastic Rerank + +This examples demonstrates how to deploy the Elastic Rerank model and use it to re-rank search results using the `text_similarity_reranker` retriever. + +Follow these steps: + +. Create an inference endpoint for the `rerank` task using the <>. ++ +[source,console] +---- +PUT _inference/rerank/my-elastic-rerank +{ + "service": "elasticsearch", + "service_settings": { + "model_id": ".rerank-v1", + "num_threads": 1, + "adaptive_allocations": { <1> + "enabled": true, + "min_number_of_allocations": 1, + "max_number_of_allocations": 10 + } + } +} +---- +// TEST[skip:uses ML] +<1> {ml-docs}/ml-nlp-auto-scale.html#nlp-model-adaptive-allocations[Adaptive allocations] will be enabled with the minimum of 1 and the maximum of 10 allocations. ++ +. Define a `text_similarity_rerank` retriever: +[source,console] +---- +POST _search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "match": { + "text": "How often does the moon hide the sun?" + } + } + } + }, + "field": "text", + "inference_id": "my-elastic-rerank", + "inference_text": "How often does the moon hide the sun?", + "rank_window_size": 100, + "min_score": 0.5 + } + } +} +---- +// TEST[skip:uses ML] + [discrete] [[text-similarity-reranker-retriever-example-cohere]] ==== Example: Cohere Rerank @@ -680,6 +738,12 @@ GET movies/_search <1> The `rule` retriever is the outermost retriever, applying rules to the search results that were previously reranked using the `rrf` retriever. <2> The `rrf` retriever returns results from all of its sub-retrievers, and the output of the `rrf` retriever is used as input to the `rule` retriever. +[discrete] +[[retriever-common-parameters]] +=== Common usage guidelines + +[discrete] +[[retriever-size-pagination]] ==== Using `from` and `size` with a retriever tree The <> and <> @@ -688,12 +752,16 @@ parameters are provided globally as part of the general They are applied to all retrievers in a retriever tree, unless a specific retriever overrides the `size` parameter using a different parameter such as `rank_window_size`. Though, the final search hits are always limited to `size`. +[discrete] +[[retriever-aggregations]] ==== Using aggregations with a retriever tree <> are globally specified as part of a search request. The query used for an aggregation is the combination of all leaf retrievers as `should` clauses in a <>. +[discrete] +[[retriever-restrictions]] ==== Restrictions on search parameters when specifying a retriever When a retriever is specified as part of a search, the following elements are not allowed at the top-level.