From 17389f0cc2774316746fdc3c5ef85e1d46525b4d Mon Sep 17 00:00:00 2001
From: conggguan <congguan@amazon.com>
Date: Thu, 6 Jun 2024 17:04:54 +0800
Subject: [PATCH] Add doc for neural-sparse-query-two-phase-processor.

Signed-off-by: conggguan <congguan@amazon.com>
---
 _search-plugins/neural-sparse-search.md       |  5 ++
 ...neural-sparse-query-two-phase-processor.md | 82 +++++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 _search-plugins/search-pipelines/neural-sparse-query-two-phase-processor.md

diff --git a/_search-plugins/neural-sparse-search.md b/_search-plugins/neural-sparse-search.md
index fd86b3f6b01..585bd8d6c0b 100644
--- a/_search-plugins/neural-sparse-search.md
+++ b/_search-plugins/neural-sparse-search.md
@@ -30,6 +30,7 @@ To use neural sparse search, follow these steps:
 1. [Create an index for ingestion](#step-2-create-an-index-for-ingestion).
 1. [Ingest documents into the index](#step-3-ingest-documents-into-the-index).
 1. [Search the index using neural search](#step-4-search-the-index-using-neural-sparse-search).
+1. [Create and enable two-phase processor (Optional)](#step-5-create-and-enable-two-phase-processor-optional).
 
 ## Step 1: Create an ingest pipeline
 
@@ -261,6 +262,10 @@ GET my-nlp-index/_search
   }
 }
 ```
+## Step 5: Create and enable two-phase processor (Optional)
+'neural_sparse_two_phase_processor' is a new feature which introduced in OpenSearch 2.15. It can speed up the neural sparse query's time cost with negligible accurency loss
+For more information, you can refer to [neural-sparse-query-two-phase-processor]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/neural-sparse-query-two-phase-processor/).
+
 
 ## Setting a default model on an index or field
 
diff --git a/_search-plugins/search-pipelines/neural-sparse-query-two-phase-processor.md b/_search-plugins/search-pipelines/neural-sparse-query-two-phase-processor.md
new file mode 100644
index 00000000000..04cfb61d720
--- /dev/null
+++ b/_search-plugins/search-pipelines/neural-sparse-query-two-phase-processor.md
@@ -0,0 +1,82 @@
+---
+layout: default
+title: NeuralSparse query two-phase processor
+nav_order: 13
+has_children: false
+parent: Search processors
+grand_parent: Search pipelines
+---
+
+# NeuralSparse query two-phase processor
+
+The `neural_sparse_two_phase_processor` search request processor is designed to set a speed-up pipeline for [neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/). It accelerates the neural sparse query by breaking down the original method of scoring all documents with all tokens into two steps. In the first step, it uses high-weight tokens to score the documents and filters out the top documents; in the second step, it uses low-weight tokens to fine-tune the scores of the top documents.
+
+## Request fields
+
+The following table lists all available request fields.
+
+Field | Data type | Description
+:--- | :--- | :---
+`enabled` | Boolean | Controls whether the two-phase is enabled with a default value of `true`.
+`two_phase_parameter` | Object | A map of key-value pairs representing the two-phase parameters and their associated values. Optional. You can specify the value of `prune_ratio`, `expansion_rate`, `max_window_size`, or any combination of these three parameters.
+`two_phase_parameter.prune_ratio` | Float | A ratio that represents how to split the high-weight tokens and low-weight tokens. The threshold is the token's max score * prune_ratio. Default value is 0.4. Valid range is [0,1].
+`two_phase_parameter.expansion_rate` | Float | A rate that specifies how many documents will be fine-tuned during the second phase. The second phase doc number equals query size (default 10) * expansion rate. Default value is 5.0. Valid range is greater than 1.0.
+`two_phase_parameter.max_window_size` | Int | A limit number of the two-phase fine-tune documents. Default value is 10000. Valid range is greater than 50.
+`tag` | String | The processor's identifier. Optional.
+`description` | String | A description of the processor. Optional.
+
+## Example
+
+### Create search pipeline
+
+The following example request creates a search pipeline with a `neural_sparse_two_phase_processor` search request processor. The processor sets a custom model ID at the index level and provides different default model IDs for two specific fields in the index:
+
+```json
+PUT /_search/pipeline/two_phase_search_pipeline
+{
+  "request_processors": [
+    {
+      "neural_sparse_two_phase_processor": {
+        "tag": "neural-sparse",
+        "description": "This processor is making two-phase processor.",
+        "enabled": true,
+        "two_phase_parameter": {
+          "prune_ratio": custom_prune_ratio,
+          "expansion_rate": custom_expansion_rate,
+          "max_window_size": custom_max_window_size
+        }
+      }
+    }
+  ]
+}
+```
+{% include copy-curl.html %}
+
+### Set search pipeline
+
+Then choose the proper index and set the `index.search.default_pipeline` to the pipeline name.
+```json
+PUT /index-name/_settings 
+{
+  "index.search.default_pipeline" : "two_phase_search_pipeline"
+}
+```
+{% include copy-curl.html %}
+
+
+## Limitation
+### Compound query support
+There is 6 types of [compound query]({{site.url}}{{site.baseurl}}/query-dsl/compound/index/). And we only support bool query now.
+- [x] bool (Boolean)
+- [ ] boosting 
+- [ ] constant_score
+- [ ] dis_max (disjunction max)
+- [ ] function_score
+- [ ] hybrid
+
+Notice, neural sparse query or bool query with a boost parameter (not same as boosting query) are also supported.
+
+## Metrics
+
+In doc-only mode, the two-phase processor will reduce the query latency by 20% to 50%, depending on the index configuration and two-phase parameters.
+In bi-encoder mode, the two-phase processor can decrease the query latency by up to 90%, also depending on the index configuration and two-phase parameters.
\ No newline at end of file