From 2e26151e227cfc17b36243f0a4a41b5f522df76e Mon Sep 17 00:00:00 2001 From: zhichao-aws Date: Fri, 1 Mar 2024 10:37:36 +0800 Subject: [PATCH 1/2] deprecated max_token_score Signed-off-by: zhichao-aws --- _query-dsl/specialized/neural-sparse.md | 8 +++----- _search-plugins/neural-sparse-search.md | 3 +-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/_query-dsl/specialized/neural-sparse.md b/_query-dsl/specialized/neural-sparse.md index c91c491dcf..da40626d3c 100644 --- a/_query-dsl/specialized/neural-sparse.md +++ b/_query-dsl/specialized/neural-sparse.md @@ -20,8 +20,7 @@ Include the following request fields in the `neural_sparse` query: "neural_sparse": { "": { "query_text": "", - "model_id": "", - "max_token_score": "" + "model_id": "" } } ``` @@ -32,7 +31,7 @@ Field | Data type | Required/Optional | Description :--- | :--- | :--- `query_text` | String | Required | The query text from which to generate vector embeddings. `model_id` | String | Required | The ID of the sparse encoding model or tokenizer model that will be used to generate vector embeddings from the query text. The model must be deployed in OpenSearch before it can be used in sparse neural search. For more information, see [Using custom models within OpenSearch]({{site.url}}{{site.baseurl}}/ml-commons-plugin/using-ml-models/) and [Neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/). -`max_token_score` | Float | Optional | The theoretical upper bound of the score for all tokens in the vocabulary (required for performance optimization). For OpenSearch-provided [pretrained sparse embedding models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/#sparse-encoding-models), we recommend setting `max_token_score` to 2 for `amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v1` and to 3.5 for `amazon/neural-sparse/opensearch-neural-sparse-encoding-v1`. +`max_token_score` | Float | Optional | (Deprecated) The theoretical upper bound of the score for all tokens in the vocabulary (required for performance optimization). For OpenSearch-provided [pretrained sparse embedding models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/#sparse-encoding-models), we recommend setting `max_token_score` to 2 for `amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v1` and to 3.5 for `amazon/neural-sparse/opensearch-neural-sparse-encoding-v1`. This field has been deprecated from 2.12 release. #### Example request @@ -43,8 +42,7 @@ GET my-nlp-index/_search "neural_sparse": { "passage_embedding": { "query_text": "Hi world", - "model_id": "aP2Q8ooBpBj3wT4HVS8a", - "max_token_score": 2 + "model_id": "aP2Q8ooBpBj3wT4HVS8a" } } } diff --git a/_search-plugins/neural-sparse-search.md b/_search-plugins/neural-sparse-search.md index c46da172a7..31ae43991e 100644 --- a/_search-plugins/neural-sparse-search.md +++ b/_search-plugins/neural-sparse-search.md @@ -154,8 +154,7 @@ GET my-nlp-index/_search "neural_sparse": { "passage_embedding": { "query_text": "Hi world", - "model_id": "aP2Q8ooBpBj3wT4HVS8a", - "max_token_score": 2 + "model_id": "aP2Q8ooBpBj3wT4HVS8a" } } } From de3988ee6f88cea68ac06b7e030a6b8a8f665aec Mon Sep 17 00:00:00 2001 From: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Date: Fri, 1 Mar 2024 09:29:50 -0500 Subject: [PATCH 2/2] Update _query-dsl/specialized/neural-sparse.md Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> --- _query-dsl/specialized/neural-sparse.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_query-dsl/specialized/neural-sparse.md b/_query-dsl/specialized/neural-sparse.md index da40626d3c..70fcfd892c 100644 --- a/_query-dsl/specialized/neural-sparse.md +++ b/_query-dsl/specialized/neural-sparse.md @@ -31,7 +31,7 @@ Field | Data type | Required/Optional | Description :--- | :--- | :--- `query_text` | String | Required | The query text from which to generate vector embeddings. `model_id` | String | Required | The ID of the sparse encoding model or tokenizer model that will be used to generate vector embeddings from the query text. The model must be deployed in OpenSearch before it can be used in sparse neural search. For more information, see [Using custom models within OpenSearch]({{site.url}}{{site.baseurl}}/ml-commons-plugin/using-ml-models/) and [Neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/). -`max_token_score` | Float | Optional | (Deprecated) The theoretical upper bound of the score for all tokens in the vocabulary (required for performance optimization). For OpenSearch-provided [pretrained sparse embedding models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/#sparse-encoding-models), we recommend setting `max_token_score` to 2 for `amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v1` and to 3.5 for `amazon/neural-sparse/opensearch-neural-sparse-encoding-v1`. This field has been deprecated from 2.12 release. +`max_token_score` | Float | Optional | (Deprecated) The theoretical upper bound of the score for all tokens in the vocabulary (required for performance optimization). For OpenSearch-provided [pretrained sparse embedding models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/#sparse-encoding-models), we recommend setting `max_token_score` to 2 for `amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v1` and to 3.5 for `amazon/neural-sparse/opensearch-neural-sparse-encoding-v1`. This field has been deprecated as of OpenSearch 2.12. #### Example request