From 543b3bc2daddbb1d502883c1f0005ff647aec12a Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Tue, 12 Nov 2024 00:27:14 +0200
Subject: [PATCH 01/15] iter

---
 docs/build.gradle                             |  89 ++++
 .../retrievers-examples.asciidoc              | 452 ++++++++++++++++--
 2 files changed, 504 insertions(+), 37 deletions(-)

diff --git a/docs/build.gradle b/docs/build.gradle
index e495ecacce27b..ca185a13f0580 100644
--- a/docs/build.gradle
+++ b/docs/build.gradle
@@ -1440,6 +1440,95 @@ setups['calendar_outages_addevent'] = setups['calendar_outages_addjob'] + '''
            ]}
 '''
 
+// Used by the retrievers-examples
+  buildRestTests.setups['retrievers_examples'] = '''
+  - do:
+      raw:
+        method: PUT
+        path: "retrievers_example"
+        body: |
+          {
+             "mappings": {
+                 "properties": {
+                     "vector": {
+                         "type": "dense_vector",
+                         "dims": 3,
+                         "similarity": "l2_norm",
+                         "index": true
+                     },
+                     "text": {
+                         "type": "text"
+                     },
+                     "year": {
+                         "type": "integer"
+                     },
+                     "topic": {
+                         "type": "keyword"
+                     }
+                 }
+             }
+          }
+  - do:
+      raw:
+        method: POST
+        path: "retrievers_example/_doc/1"
+        body: |
+          {
+           "vector": [0.23, 0.67, 0.89],
+           "text": "Large language models are revolutionizing information retrieval by boosting search precision, deepening contextual understanding, and reshaping user experiences in data-rich environments.",
+           "year": 2024,
+           "topic": ["llm", "ai", "information_retrieval"]
+          }
+  - do:
+      raw:
+        method: POST
+        path: "retrievers_example/_doc/2"
+        body: |
+          {
+           "vector": [0.12, 0.56, 0.78],
+           "text": "Artificial intelligence is transforming medicine, from advancing diagnostics and tailoring treatment plans to empowering predictive patient care for improved health outcomes.",
+           "year": 2023,
+           "topic": ["ai", "medicine"]
+          }
+  - do:
+      raw:
+        method: POST
+        path: "retrievers_example/_doc/3"
+        body: |
+          {
+           "vector": [0.45, 0.32, 0.91],
+            "text": "AI is redefining security by enabling advanced threat detection, proactive risk analysis, and dynamic defenses against increasingly sophisticated cyber threats.",
+           "year": 2024,
+           "topic": ["ai", "security"]
+          }
+  - do:
+      raw:
+        method: POST
+        path: "retrievers_example/_doc/4"
+        body: |
+          {
+           "vector": [0.34, 0.21, 0.98],
+           "text": "Elastic introduces Elastic AI Assistant, the open, generative AI sidekick powered by ESRE to democratize cybersecurity and enable users of every skill level.",
+           "year": 2023,
+           "topic": ["ai", "elastic", "assistant"]
+          }
+  - do:
+      raw:
+        method: POST
+        path: "retrievers_example/_doc/5"
+        body: |
+          {
+           "vector": [0.11, 0.65, 0.47],
+           "text": "Learn how to spin up a deployment of our hosted Elasticsearch Service and use Elastic Observability to gain deeper insight into the behavior of your applications and systems.",
+           "year": 2024,
+           "topic": ["documentation", "observability", "elastic"]
+          }
+  - do:
+      raw:
+        method: POST
+        path: "retrievers_example/_refresh"
+'''
+
 // used by median absolute deviation aggregation
 setups['reviews'] = '''
   - do:
diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index 8cd1a4bf5ce98..3c09384ee3bb8 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -10,9 +10,8 @@ To demonstrate the full functionality of retrievers, these examples require acce
 
 To begin with, we'll set up the necessary services and have them in place for later use.
 
-[source,js]
+[source,console]
 ----
-// Setup rerank task stored as `my-rerank-model`
 PUT _inference/rerank/my-rerank-model
 {
  "service": "cohere",
@@ -25,7 +24,7 @@ PUT _inference/rerank/my-rerank-model
 //NOTCONSOLE
 
 Now that we have our reranking service in place, lets create the `retrievers_example` index, and add some documents to it.
-[source,js]
+[source,console]
 ----
 PUT retrievers_example
 {
@@ -49,11 +48,7 @@ PUT retrievers_example
        }
    }
 }
-----
-//NOTCONSOLE
 
-[source,js]
-----
 POST /retrievers_example/_doc/1
 {
  "vector": [0.23, 0.67, 0.89],
@@ -94,8 +89,10 @@ POST /retrievers_example/_doc/5
  "topic": ["documentation", "observability", "elastic"]
 }
 
+POST /retrievers_example/_refresh
+
 ----
-//NOTCONSOLE
+// NOTCONSOLE
 
 Now that we also have our documents in place, let's try to run some queries using retrievers.
 
@@ -112,7 +109,7 @@ To implement this in the retriever framework, we start with the top-level elemen
 retriever. This retriever operates on top of two other retrievers: a `knn` retriever and a
 `standard` retriever. Our query structure would look like this:
 
-[source,js]
+[source,console]
 ----
 GET /retrievers_example/_search
 {
@@ -146,10 +143,51 @@ GET /retrievers_example/_search
            "rank_constant": 1
        }
    },
-   "_source": ["text", "topic"]
+   "_source": false
 }
 ----
-//NOTCONSOLE
+// TEST[setup:retrievers_examples]
+
+Which would return the following response based on the final rrf score for each result
+
+[source,console-result]
+----
+{
+    "took": 42,
+    "timed_out": false,
+    "_shards": {
+        "total": 1,
+        "successful": 1,
+        "skipped": 0,
+        "failed": 0
+    },
+    "hits": {
+        "total": {
+            "value": 3,
+            "relation": "eq"
+        },
+        "max_score": 0.8333334,
+        "hits": [
+            {
+                "_index": "retrievers_example",
+                "_id": "1",
+                "_score": 0.8333334
+            },
+            {
+                "_index": "retrievers_example",
+                "_id": "2",
+                "_score": 0.8333334
+            },
+            {
+                "_index": "retrievers_example",
+                "_id": "3",
+                "_score": 0.25
+            }
+        ]
+    }
+}
+----
+// TESTRESPONSE[s/"took": 42/"took": $body.took/]
 
 [discrete]
 [[retrievers-examples-collapsing-retriever-results]]
@@ -160,7 +198,7 @@ up using the `collapse` parameter with our retriever. This enables grouping resu
 any field and returns only the highest-scoring document from each group. In this example
 we'll collapse our results based on the `year` field.
 
-[source,js]
+[source,console]
 ----
 GET /retrievers_example/_search
 {
@@ -198,13 +236,109 @@ GET /retrievers_example/_search
        "field": "year",
        "inner_hits": {
            "name": "topic related documents",
-           "_source": ["text", "year"]
+           "_source": ["year"]
        }
    },
-   "_source": ["text", "topic"]
+    "_source": false
 }
 ----
-//NOTCONSOLE
+// TEST[setup:retrievers_examples]
+
+Which would return the following response collapsed results
+
+[source,console-result]
+----
+{
+    "took": 42,
+    "timed_out": false,
+    "_shards": {
+        "total": 1,
+        "successful": 1,
+        "skipped": 0,
+        "failed": 0
+    },
+    "hits": {
+        "total": {
+            "value": 3,
+            "relation": "eq"
+        },
+        "max_score": 0.8333334,
+        "hits": [
+            {
+                "_index": "retrievers_example",
+                "_id": "1",
+                "_score": 0.8333334,
+                "fields": {
+                    "year": [
+                        2024
+                    ]
+                },
+                "inner_hits": {
+                    "topic related documents": {
+                        "hits": {
+                            "total": {
+                                "value": 2,
+                                "relation": "eq"
+                            },
+                            "max_score": 0.8333334,
+                            "hits": [
+                                {
+                                    "_index": "retrievers_example",
+                                    "_id": "1",
+                                    "_score": 0.8333334,
+                                    "_source": {
+                                        "year": 2024
+                                    }
+                                },
+                                {
+                                    "_index": "retrievers_example",
+                                    "_id": "3",
+                                    "_score": 0.25,
+                                    "_source": {
+                                        "year": 2024
+                                    }
+                                }
+                            ]
+                        }
+                    }
+                }
+            },
+            {
+                "_index": "retrievers_example",
+                "_id": "2",
+                "_score": 0.8333334,
+                "fields": {
+                    "year": [
+                        2023
+                    ]
+                },
+                "inner_hits": {
+                    "topic related documents": {
+                        "hits": {
+                            "total": {
+                                "value": 1,
+                                "relation": "eq"
+                            },
+                            "max_score": 0.8333334,
+                            "hits": [
+                                {
+                                    "_index": "retrievers_example",
+                                    "_id": "2",
+                                    "_score": 0.8333334,
+                                    "_source": {
+                                        "year": 2023
+                                    }
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        ]
+    }
+}
+----
+// TESTRESPONSE[s/"took": 42/"took": $body.took/]
 
 [discrete]
 [[retrievers-examples-text-similarity-reranker-on-top-of-rrf]]
@@ -214,7 +348,7 @@ Previously, we used a `text_similarity_reranker` retriever within an `rrf` retri
 Because retrievers support full composability, we can also rerank the results of an
 `rrf` retriever. Let's apply this to our first example.
 
-[source,js]
+[source,console]
 ----
 GET retrievers_example/_search
 {
@@ -255,11 +389,11 @@ GET retrievers_example/_search
            "inference_text": "What are the state of the art applications of AI in information retrieval?"
        }
    },
-   "_source": ["text", "topic"]
+   "_source": false
 }
 
 ----
-//NOTCONSOLE
+// TEST[skip:no_access_to_reranker]
 
 [discrete]
 [[retrievers-examples-rrf-ranking-on-text-similarity-reranker-results]]
@@ -269,7 +403,7 @@ For this example, we'll replace our semantic query with the `my-rerank-model`
 reranker we previously configured. Since this is a reranker, it needs an initial pool of
 documents to work with. In this case, we'll filter for documents about `ai` topics.
 
-[source,js]
+[source,console]
 ----
 GET /retrievers_example/_search
 {
@@ -309,13 +443,10 @@ GET /retrievers_example/_search
             "rank_constant": 1
         }
     },
-    "_source": [
-        "text",
-        "topic"
-    ]
+    "_source": false
 }
 ----
-//NOTCONSOLE
+// TEST[skip:no_access_to_reranker]
 
 [discrete]
 [[retrievers-examples-chaining-text-similarity-reranker-retrievers]]
@@ -323,7 +454,7 @@ GET /retrievers_example/_search
 
 Full composability means we can chain together multiple retrievers of the same type. For instance, imagine we have a computationally expensive reranker that's specialized for AI content. We can rerank the results of a `text_similarity_reranker` using another `text_similarity_reranker` retriever. Each reranker can operate on different fields and/or use different inference services.
 
-[source,js]
+[source,console]
 ----
 GET retrievers_example/_search
 {
@@ -355,14 +486,10 @@ GET retrievers_example/_search
            "inference_text": "Applications of Large Language Models in technology and their impact on user satisfaction"
        }
    },
-   "_source": [
-       "text",
-       "topic"
-   ]
+    "_source": false
 }
 ----
-//NOTCONSOLE
-
+// TEST[skip:no_access_to_reranker]
 
 Note that our example applies two reranking steps. First, we rerank the top 100
 documents from the `knn` search using the `my-rerank-model` reranker. Then we
@@ -380,7 +507,7 @@ the `terms` aggregation for the `topic` field will include all results, not just
 from the 2 nested retrievers, i.e. all documents whose `year` field is greater than 2023, and whose `topic` field
 matches the term `elastic`.
 
-[source,js]
+[source,console]
 ----
 GET retrievers_example/_search
 {
@@ -412,10 +539,7 @@ GET retrievers_example/_search
             "rank_constant": 1
         }
     },
-    "_source": [
-        "text",
-        "topic"
-    ],
+    "_source": false,
     "aggs": {
         "topics": {
             "terms": {
@@ -425,4 +549,258 @@ GET retrievers_example/_search
     }
 }
 ----
-//NOTCONSOLE
+// TEST[setup:retrievers_examples]
+
+The output of which would look like the following:
+[source, console-result]
+----
+{
+    "took": 42,
+    "timed_out": false,
+    "_shards": {
+        "total": 1,
+        "successful": 1,
+        "skipped": 0,
+        "failed": 0
+    },
+    "hits": {
+        "total": {
+            "value": 4,
+            "relation": "eq"
+        },
+        "max_score": 0.5833334,
+        "hits": [
+            {
+                "_index": "retrievers_example",
+                "_id": "5",
+                "_score": 0.5833334
+            },
+            {
+                "_index": "retrievers_example",
+                "_id": "1",
+                "_score": 0.5
+            },
+            {
+                "_index": "retrievers_example",
+                "_id": "4",
+                "_score": 0.5
+            },
+            {
+                "_index": "retrievers_example",
+                "_id": "3",
+                "_score": 0.33333334
+            }
+        ]
+    },
+    "aggregations": {
+        "topics": {
+            "doc_count_error_upper_bound": 0,
+            "sum_other_doc_count": 0,
+            "buckets": [
+                {
+                    "key": "ai",
+                    "doc_count": 3
+                },
+                {
+                    "key": "elastic",
+                    "doc_count": 2
+                },
+                {
+                    "key": "assistant",
+                    "doc_count": 1
+                },
+                {
+                    "key": "documentation",
+                    "doc_count": 1
+                },
+                {
+                    "key": "information_retrieval",
+                    "doc_count": 1
+                },
+                {
+                    "key": "llm",
+                    "doc_count": 1
+                },
+                {
+                    "key": "observability",
+                    "doc_count": 1
+                },
+                {
+                    "key": "security",
+                    "doc_count": 1
+                }
+            ]
+        }
+    }
+}
+----
+// TESTRESPONSE[s/"took": 42/"took": $body.took/]
+
+
+[discrete]
+[[retrievers-examples-explain-multiple-rrf]]
+==== Example: Explainability with multiple retrievers
+By adding `explain: true` to the request, each retriever will now provide a detailed explanation of all the steps
+and calculations that took place for the final score to be computed. Composability is fully supported as well in the context of `explain`, and
+each retriever will provide its own explanation, as we can see in the example below
+
+[source,console]
+----
+GET /retrievers_example/_search
+{
+    "retriever": {
+        "rrf": {
+            "retrievers": [
+                {
+                    "standard": {
+                        "query": {
+                            "term": {
+                                "topic": "elastic"
+                            }
+                        }
+                    }
+                },
+                {
+                    "rrf": {
+                        "retrievers": [
+                            {
+                                "standard": {
+                                    "query": {
+                                        "query_string": {
+                                            "query": "(information retrieval) OR (artificial intelligence)",
+                                            "default_field": "text"
+                                        }
+                                    }
+                                }
+                            },
+                            {
+                                "knn": {
+                                    "field": "vector",
+                                    "query_vector": [
+                                        0.23,
+                                        0.67,
+                                        0.89
+                                    ],
+                                    "k": 3,
+                                    "num_candidates": 5
+                                }
+                            }
+                        ],
+                        "rank_window_size": 10,
+                        "rank_constant": 1
+                    }
+                }
+            ],
+            "rank_window_size": 10,
+            "rank_constant": 1
+        }
+    },
+    "_source": false,
+    "size": 1,
+    "explain": true
+}
+----
+// TEST[setup:retrievers_examples]
+
+The output of which, albeit a bit verbose, will provide all the necessary info to assist in debugging and reason with ranking
+[source, console-result]
+----
+{
+    "took": 42,
+    "timed_out": false,
+    "_shards": {
+        "total": 1,
+        "successful": 1,
+        "skipped": 0,
+        "failed": 0
+    },
+    "hits": {
+        "total": {
+            "value": 5,
+            "relation": "eq"
+        },
+        "max_score": 0.5,
+        "hits": [
+            {
+                "_shard": "[retrievers_example][0]",
+                "_node": "jn_rdZFKS3-UgWVsVdj2Vg",
+                "_index": "retrievers_example",
+                "_id": "1",
+                "_score": 0.5,
+                "_explanation": {
+                    "value": 0.5,
+                    "description": "rrf score: [0.5] computed for initial ranks [0, 1] with rankConstant: [1] as sum of [1 / (rank + rankConstant)] for each query",
+                    "details": [
+                        {
+                            "value": 0,
+                            "description": "rrf score: [0], result not found in query at index [0]",
+                            "details": []
+                        },
+                        {
+                            "value": 1,
+                            "description": "rrf score: [0.5], for rank [1] in query at index [1] computed as [1 / (1 + 1)], for matching query with score",
+                            "details": [
+                                {
+                                    "value": 0.8333334,
+                                    "description": "rrf score: [0.8333334] computed for initial ranks [2, 1] with rankConstant: [1] as sum of [1 / (rank + rankConstant)] for each query",
+                                    "details": [
+                                        {
+                                            "value": 2,
+                                            "description": "rrf score: [0.33333334], for rank [2] in query at index [0] computed as [1 / (2 + 1)], for matching query with score",
+                                            "details": [
+                                                {
+                                                    "value": 2.8129659,
+                                                    "description": "sum of:",
+                                                    "details": [
+                                                        {
+                                                            "value": 1.4064829,
+                                                            "description": "weight(text:information in 0) [PerFieldSimilarity], result of:",
+                                                            "details": [
+                                                                {
+                                                                    ...
+                                                                }
+                                                            ]
+                                                        },
+                                                        {
+                                                            "value": 1.4064829,
+                                                            "description": "weight(text:retrieval in 0) [PerFieldSimilarity], result of:",
+                                                            "details": [
+                                                                {
+                                                                    ...
+                                                                }
+                                                            ]
+                                                        }
+                                                    ]
+                                                }
+                                            ]
+                                        },
+                                        {
+                                            "value": 1,
+                                            "description": "rrf score: [0.5], for rank [1] in query at index [1] computed as [1 / (1 + 1)], for matching query with score",
+                                            "details": [
+                                                {
+                                                    "value": 1,
+                                                    "description": "doc [0] with an original score of [1.0] is at rank [1] from the following source queries.",
+                                                    "details": [
+                                                        {
+                                                            "value": 1,
+                                                            "description": "found vector with calculated similarity: 1.0",
+                                                            "details": []
+                                                        }
+                                                    ]
+                                                }
+                                            ]
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            }
+        ]
+    }
+}
+----
+// TESTRESPONSE[s/"took": 42/"took": $body.took/]
+// TESTRESPONSE[s/"\.\.\."/$body.hits.hits.0._explanation.details/]

From 96b188055122b77c5c6fda01a199bedccebe3ed0 Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Tue, 12 Nov 2024 09:59:45 +0200
Subject: [PATCH 02/15] iter

---
 .../search/search-your-data/retrievers-examples.asciidoc   | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index 3c09384ee3bb8..0ee40d24ebd8c 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -703,7 +703,7 @@ GET /retrievers_example/_search
 // TEST[setup:retrievers_examples]
 
 The output of which, albeit a bit verbose, will provide all the necessary info to assist in debugging and reason with ranking
-[source, console-result]
+[source, js]
 ----
 {
     "took": 42,
@@ -723,7 +723,7 @@ The output of which, albeit a bit verbose, will provide all the necessary info t
         "hits": [
             {
                 "_shard": "[retrievers_example][0]",
-                "_node": "jn_rdZFKS3-UgWVsVdj2Vg",
+                "_node": "jnrdZFKS3abUgWVsVdj2Vg",
                 "_index": "retrievers_example",
                 "_id": "1",
                 "_score": 0.5,
@@ -802,5 +802,4 @@ The output of which, albeit a bit verbose, will provide all the necessary info t
     }
 }
 ----
-// TESTRESPONSE[s/"took": 42/"took": $body.took/]
-// TESTRESPONSE[s/"\.\.\."/$body.hits.hits.0._explanation.details/]
+// NOTCONSOLE

From 8c0717e098b7fb6a863707321ed871b34fdd5aae Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Thu, 14 Nov 2024 12:06:28 +0200
Subject: [PATCH 03/15] moving tests setup within the doc page itself

---
 docs/build.gradle                             | 89 -------------------
 .../retrievers-examples.asciidoc              | 18 ++--
 2 files changed, 9 insertions(+), 98 deletions(-)

diff --git a/docs/build.gradle b/docs/build.gradle
index ca185a13f0580..e495ecacce27b 100644
--- a/docs/build.gradle
+++ b/docs/build.gradle
@@ -1440,95 +1440,6 @@ setups['calendar_outages_addevent'] = setups['calendar_outages_addjob'] + '''
            ]}
 '''
 
-// Used by the retrievers-examples
-  buildRestTests.setups['retrievers_examples'] = '''
-  - do:
-      raw:
-        method: PUT
-        path: "retrievers_example"
-        body: |
-          {
-             "mappings": {
-                 "properties": {
-                     "vector": {
-                         "type": "dense_vector",
-                         "dims": 3,
-                         "similarity": "l2_norm",
-                         "index": true
-                     },
-                     "text": {
-                         "type": "text"
-                     },
-                     "year": {
-                         "type": "integer"
-                     },
-                     "topic": {
-                         "type": "keyword"
-                     }
-                 }
-             }
-          }
-  - do:
-      raw:
-        method: POST
-        path: "retrievers_example/_doc/1"
-        body: |
-          {
-           "vector": [0.23, 0.67, 0.89],
-           "text": "Large language models are revolutionizing information retrieval by boosting search precision, deepening contextual understanding, and reshaping user experiences in data-rich environments.",
-           "year": 2024,
-           "topic": ["llm", "ai", "information_retrieval"]
-          }
-  - do:
-      raw:
-        method: POST
-        path: "retrievers_example/_doc/2"
-        body: |
-          {
-           "vector": [0.12, 0.56, 0.78],
-           "text": "Artificial intelligence is transforming medicine, from advancing diagnostics and tailoring treatment plans to empowering predictive patient care for improved health outcomes.",
-           "year": 2023,
-           "topic": ["ai", "medicine"]
-          }
-  - do:
-      raw:
-        method: POST
-        path: "retrievers_example/_doc/3"
-        body: |
-          {
-           "vector": [0.45, 0.32, 0.91],
-            "text": "AI is redefining security by enabling advanced threat detection, proactive risk analysis, and dynamic defenses against increasingly sophisticated cyber threats.",
-           "year": 2024,
-           "topic": ["ai", "security"]
-          }
-  - do:
-      raw:
-        method: POST
-        path: "retrievers_example/_doc/4"
-        body: |
-          {
-           "vector": [0.34, 0.21, 0.98],
-           "text": "Elastic introduces Elastic AI Assistant, the open, generative AI sidekick powered by ESRE to democratize cybersecurity and enable users of every skill level.",
-           "year": 2023,
-           "topic": ["ai", "elastic", "assistant"]
-          }
-  - do:
-      raw:
-        method: POST
-        path: "retrievers_example/_doc/5"
-        body: |
-          {
-           "vector": [0.11, 0.65, 0.47],
-           "text": "Learn how to spin up a deployment of our hosted Elasticsearch Service and use Elastic Observability to gain deeper insight into the behavior of your applications and systems.",
-           "year": 2024,
-           "topic": ["documentation", "observability", "elastic"]
-          }
-  - do:
-      raw:
-        method: POST
-        path: "retrievers_example/_refresh"
-'''
-
 // used by median absolute deviation aggregation
 setups['reviews'] = '''
   - do:
diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index 0ee40d24ebd8c..560ae88c33fee 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -10,7 +10,7 @@ To demonstrate the full functionality of retrievers, these examples require acce
 
 To begin with, we'll set up the necessary services and have them in place for later use.
 
-[source,console]
+[source,js]
 ----
 PUT _inference/rerank/my-rerank-model
 {
@@ -92,7 +92,7 @@ POST /retrievers_example/_doc/5
 POST /retrievers_example/_refresh
 
 ----
-// NOTCONSOLE
+// TESTSETUP
 
 Now that we also have our documents in place, let's try to run some queries using retrievers.
 
@@ -146,7 +146,7 @@ GET /retrievers_example/_search
    "_source": false
 }
 ----
-// TEST[setup:retrievers_examples]
+// TEST
 
 Which would return the following response based on the final rrf score for each result
 
@@ -242,7 +242,7 @@ GET /retrievers_example/_search
     "_source": false
 }
 ----
-// TEST[setup:retrievers_examples]
+// TEST[continued]
 
 Which would return the following response collapsed results
 
@@ -393,7 +393,7 @@ GET retrievers_example/_search
 }
 
 ----
-// TEST[skip:no_access_to_reranker]
+// TEST[skip: no access to reranker service]
 
 [discrete]
 [[retrievers-examples-rrf-ranking-on-text-similarity-reranker-results]]
@@ -446,7 +446,7 @@ GET /retrievers_example/_search
     "_source": false
 }
 ----
-// TEST[skip:no_access_to_reranker]
+// TEST[skip: no access to reranker service]
 
 [discrete]
 [[retrievers-examples-chaining-text-similarity-reranker-retrievers]]
@@ -489,7 +489,7 @@ GET retrievers_example/_search
     "_source": false
 }
 ----
-// TEST[skip:no_access_to_reranker]
+// TEST[skip: no access to reranker service]
 
 Note that our example applies two reranking steps. First, we rerank the top 100
 documents from the `knn` search using the `my-rerank-model` reranker. Then we
@@ -549,7 +549,7 @@ GET retrievers_example/_search
     }
 }
 ----
-// TEST[setup:retrievers_examples]
+// TEST[continued]
 
 The output of which would look like the following:
 [source, console-result]
@@ -700,7 +700,7 @@ GET /retrievers_example/_search
     "explain": true
 }
 ----
-// TEST[setup:retrievers_examples]
+// TEST[continued]
 
 The output of which, albeit a bit verbose, will provide all the necessary info to assist in debugging and reason with ranking
 [source, js]

From 89c3c35302b464e8343c505e459a63441eba22c9 Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Thu, 14 Nov 2024 12:34:40 +0200
Subject: [PATCH 04/15] iter

---
 .../retrievers-examples.asciidoc              | 31 +++++++++----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index 560ae88c33fee..dd49c68164d6e 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -348,7 +348,7 @@ Previously, we used a `text_similarity_reranker` retriever within an `rrf` retri
 Because retrievers support full composability, we can also rerank the results of an
 `rrf` retriever. Let's apply this to our first example.
 
-[source,console]
+[source,js]
 ----
 GET retrievers_example/_search
 {
@@ -393,7 +393,7 @@ GET retrievers_example/_search
 }
 
 ----
-// TEST[skip: no access to reranker service]
+// NOTCONSOLE
 
 [discrete]
 [[retrievers-examples-rrf-ranking-on-text-similarity-reranker-results]]
@@ -403,7 +403,7 @@ For this example, we'll replace our semantic query with the `my-rerank-model`
 reranker we previously configured. Since this is a reranker, it needs an initial pool of
 documents to work with. In this case, we'll filter for documents about `ai` topics.
 
-[source,console]
+[source,js]
 ----
 GET /retrievers_example/_search
 {
@@ -446,7 +446,7 @@ GET /retrievers_example/_search
     "_source": false
 }
 ----
-// TEST[skip: no access to reranker service]
+// NOTCONSOLE
 
 [discrete]
 [[retrievers-examples-chaining-text-similarity-reranker-retrievers]]
@@ -454,7 +454,7 @@ GET /retrievers_example/_search
 
 Full composability means we can chain together multiple retrievers of the same type. For instance, imagine we have a computationally expensive reranker that's specialized for AI content. We can rerank the results of a `text_similarity_reranker` using another `text_similarity_reranker` retriever. Each reranker can operate on different fields and/or use different inference services.
 
-[source,console]
+[source,js]
 ----
 GET retrievers_example/_search
 {
@@ -489,7 +489,7 @@ GET retrievers_example/_search
     "_source": false
 }
 ----
-// TEST[skip: no access to reranker service]
+// NOTCONSOLE
 
 Note that our example applies two reranking steps. First, we rerank the top 100
 documents from the `knn` search using the `my-rerank-model` reranker. Then we
@@ -703,7 +703,7 @@ GET /retrievers_example/_search
 // TEST[continued]
 
 The output of which, albeit a bit verbose, will provide all the necessary info to assist in debugging and reason with ranking
-[source, js]
+[source, console-result]
 ----
 {
     "took": 42,
@@ -732,7 +732,7 @@ The output of which, albeit a bit verbose, will provide all the necessary info t
                     "description": "rrf score: [0.5] computed for initial ranks [0, 1] with rankConstant: [1] as sum of [1 / (rank + rankConstant)] for each query",
                     "details": [
                         {
-                            "value": 0,
+                            "value": 0.0,
                             "description": "rrf score: [0], result not found in query at index [0]",
                             "details": []
                         },
@@ -756,18 +756,14 @@ The output of which, albeit a bit verbose, will provide all the necessary info t
                                                             "value": 1.4064829,
                                                             "description": "weight(text:information in 0) [PerFieldSimilarity], result of:",
                                                             "details": [
-                                                                {
-                                                                    ...
-                                                                }
+                                                                ***
                                                             ]
                                                         },
                                                         {
                                                             "value": 1.4064829,
                                                             "description": "weight(text:retrieval in 0) [PerFieldSimilarity], result of:",
                                                             "details": [
-                                                                {
-                                                                    ...
-                                                                }
+                                                                ***
                                                             ]
                                                         }
                                                     ]
@@ -783,7 +779,7 @@ The output of which, albeit a bit verbose, will provide all the necessary info t
                                                     "description": "doc [0] with an original score of [1.0] is at rank [1] from the following source queries.",
                                                     "details": [
                                                         {
-                                                            "value": 1,
+                                                            "value": 1.0,
                                                             "description": "found vector with calculated similarity: 1.0",
                                                             "details": []
                                                         }
@@ -802,4 +798,7 @@ The output of which, albeit a bit verbose, will provide all the necessary info t
     }
 }
 ----
-// NOTCONSOLE
+// TESTRESPONSE[s/"took": 42/"took": $body.took/]
+// TESTRESPONSE[s/\.\.\./$body.hits.hits.0._explanation.details.1.details.0.details.0.details.0.details.0.details.0/]
+// TESTRESPONSE[s/\*\*\*/$body.hits.hits.0._explanation.details.1.details.0.details.0.details.0.details.1.details.0/]
+// TESTRESPONSE[s/jnrdZFKS3abUgWVsVdj2Vg/$body.hits.hits.0._node/]

From 75982fba533ec6833b3a124f0f9c8d8f495adea1 Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Thu, 14 Nov 2024 12:45:53 +0200
Subject: [PATCH 05/15] reordering examples

---
 .../retrievers-examples.asciidoc              | 354 +++++++++---------
 1 file changed, 178 insertions(+), 176 deletions(-)

diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index dd49c68164d6e..23d898bee3fb8 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -1,29 +1,16 @@
 [[retrievers-examples]]
-=== Retrievers examples
 
-Learn how to combine different retrievers in these hands-on examples.
-To demonstrate the full functionality of retrievers, these examples require access to a <<semantic-reranking-models,semantic reranking model>> set up using the <<inference-apis,Elastic inference APIs>>.
+Let's work through some hands-on examples to show the different usages and functionalities that
+retrievers now offer.
+
+=== Retrievers examples
 
 [discrete]
 [[retrievers-examples-setup]]
 ==== Add example data
 
-To begin with, we'll set up the necessary services and have them in place for later use.
+To begin with, lets create the `retrievers_example` index, and add some documents to it.
 
-[source,js]
-----
-PUT _inference/rerank/my-rerank-model
-{
- "service": "cohere",
- "service_settings": {
-   "model_id": "rerank-english-v3.0",
-   "api_key": "{{COHERE_API_KEY}}"
- }
-}
-----
-//NOTCONSOLE
-
-Now that we have our reranking service in place, lets create the `retrievers_example` index, and add some documents to it.
 [source,console]
 ----
 PUT retrievers_example
@@ -94,7 +81,7 @@ POST /retrievers_example/_refresh
 ----
 // TESTSETUP
 
-Now that we also have our documents in place, let's try to run some queries using retrievers.
+Now that we  have our documents in place, let's try to run some queries using retrievers.
 
 [discrete]
 [[retrievers-examples-combining-standard-knn-retrievers-with-rrf]]
@@ -340,162 +327,6 @@ Which would return the following response collapsed results
 ----
 // TESTRESPONSE[s/"took": 42/"took": $body.took/]
 
-[discrete]
-[[retrievers-examples-text-similarity-reranker-on-top-of-rrf]]
-==== Example: Rerank results of an RRF retriever
-
-Previously, we used a `text_similarity_reranker` retriever within an `rrf` retriever.
-Because retrievers support full composability, we can also rerank the results of an
-`rrf` retriever. Let's apply this to our first example.
-
-[source,js]
-----
-GET retrievers_example/_search
-{
-   "retriever": {
-       "text_similarity_reranker": {
-           "retriever": {
-               "rrf": {
-                   "retrievers": [
-                       {
-                           "standard":{
-                               "query":{
-                                   "query_string":{
-                                      "query": "(information retrieval) OR (artificial intelligence)",
-                                      "default_field": "text"
-                                   }
-                               }
-                           }
-                       },
-                       {
-                           "knn": {
-                               "field": "vector",
-                               "query_vector": [
-                                   0.23,
-                                   0.67,
-                                   0.89
-                               ],
-                               "k": 3,
-                               "num_candidates": 5
-                           }
-                       }
-                   ],
-                   "rank_window_size": 10,
-                   "rank_constant": 1
-               }
-           },
-           "field": "text",
-           "inference_id": "my-rerank-model",
-           "inference_text": "What are the state of the art applications of AI in information retrieval?"
-       }
-   },
-   "_source": false
-}
-
-----
-// NOTCONSOLE
-
-[discrete]
-[[retrievers-examples-rrf-ranking-on-text-similarity-reranker-results]]
-==== Example: RRF with semantic reranker
-
-For this example, we'll replace our semantic query with the `my-rerank-model`
-reranker we previously configured. Since this is a reranker, it needs an initial pool of
-documents to work with. In this case, we'll filter for documents about `ai` topics.
-
-[source,js]
-----
-GET /retrievers_example/_search
-{
-    "retriever": {
-        "rrf": {
-            "retrievers": [
-                {
-                    "knn": {
-                        "field": "vector",
-                        "query_vector": [
-                            0.23,
-                            0.67,
-                            0.89
-                        ],
-                        "k": 3,
-                        "num_candidates": 5
-                    }
-                },
-                {
-                    "text_similarity_reranker": {
-                        "retriever": {
-                            "standard": {
-                                "query": {
-                                    "term": {
-                                        "topic": "ai"
-                                    }
-                                }
-                            }
-                        },
-                        "field": "text",
-                        "inference_id": "my-rerank-model",
-                        "inference_text": "Can I use generative AI to identify user intent and improve search relevance?"
-                    }
-                }
-            ],
-            "rank_window_size": 10,
-            "rank_constant": 1
-        }
-    },
-    "_source": false
-}
-----
-// NOTCONSOLE
-
-[discrete]
-[[retrievers-examples-chaining-text-similarity-reranker-retrievers]]
-==== Example: Chaining multiple semantic rerankers
-
-Full composability means we can chain together multiple retrievers of the same type. For instance, imagine we have a computationally expensive reranker that's specialized for AI content. We can rerank the results of a `text_similarity_reranker` using another `text_similarity_reranker` retriever. Each reranker can operate on different fields and/or use different inference services.
-
-[source,js]
-----
-GET retrievers_example/_search
-{
-   "retriever": {
-       "text_similarity_reranker": {
-           "retriever": {
-               "text_similarity_reranker": {
-                   "retriever": {
-                       "knn": {
-                           "field": "vector",
-                           "query_vector": [
-                               0.23,
-                               0.67,
-                               0.89
-                           ],
-                           "k": 3,
-                           "num_candidates": 5
-                       }
-                   },
-                   "rank_window_size": 100,
-                   "field": "text",
-                   "inference_id": "my-rerank-model",
-                   "inference_text": "What are the state of the art applications of AI in information retrieval?"
-               }
-           },
-           "rank_window_size": 10,
-           "field": "text",
-           "inference_id": "my-other-more-expensive-rerank-model",
-           "inference_text": "Applications of Large Language Models in technology and their impact on user satisfaction"
-       }
-   },
-    "_source": false
-}
-----
-// NOTCONSOLE
-
-Note that our example applies two reranking steps. First, we rerank the top 100
-documents from the `knn` search using the `my-rerank-model` reranker. Then we
-pick the top 10 results and rerank them using the more fine-grained
-`my-other-more-expensive-rerank-model`.
-
 [discrete]
 [[retrievers-examples-rrf-and-aggregations]]
 ==== Example: Combine RRF with aggregations
@@ -636,7 +467,6 @@ The output of which would look like the following:
 ----
 // TESTRESPONSE[s/"took": 42/"took": $body.took/]
 
-
 [discrete]
 [[retrievers-examples-explain-multiple-rrf]]
 ==== Example: Explainability with multiple retrievers
@@ -802,3 +632,175 @@ The output of which, albeit a bit verbose, will provide all the necessary info t
 // TESTRESPONSE[s/\.\.\./$body.hits.hits.0._explanation.details.1.details.0.details.0.details.0.details.0.details.0/]
 // TESTRESPONSE[s/\*\*\*/$body.hits.hits.0._explanation.details.1.details.0.details.0.details.0.details.1.details.0/]
 // TESTRESPONSE[s/jnrdZFKS3abUgWVsVdj2Vg/$body.hits.hits.0._node/]
+
+[discrete]
+[[retrievers-examples-text-similarity-reranker-on-top-of-rrf]]
+==== Example: Rerank results of an RRF retriever
+
+To demonstrate the full functionality of retrievers, the following examples also require access to a <<semantic-reranking-models,semantic reranking model>> set up using the <<inference-apis,Elastic inference APIs>>.
+
+Let's setup a reranking service and use it through the `text_similarity_reranker` retriever to rerank our top results.
+
+[source,console]
+----
+PUT _inference/rerank/my-rerank-model
+{
+ "service": "cohere",
+ "service_settings": {
+   "model_id": "rerank-english-v3.0",
+   "api_key": "{{COHERE_API_KEY}}"
+ }
+}
+----
+//NOTCONSOLE
+
+Let's start by reranking the results of the `rrf` retriever in our previous example.
+
+[source,console]
+----
+GET retrievers_example/_search
+{
+   "retriever": {
+       "text_similarity_reranker": {
+           "retriever": {
+               "rrf": {
+                   "retrievers": [
+                       {
+                           "standard":{
+                               "query":{
+                                   "query_string":{
+                                      "query": "(information retrieval) OR (artificial intelligence)",
+                                      "default_field": "text"
+                                   }
+                               }
+                           }
+                       },
+                       {
+                           "knn": {
+                               "field": "vector",
+                               "query_vector": [
+                                   0.23,
+                                   0.67,
+                                   0.89
+                               ],
+                               "k": 3,
+                               "num_candidates": 5
+                           }
+                       }
+                   ],
+                   "rank_window_size": 10,
+                   "rank_constant": 1
+               }
+           },
+           "field": "text",
+           "inference_id": "my-rerank-model",
+           "inference_text": "What are the state of the art applications of AI in information retrieval?"
+       }
+   },
+   "_source": false
+}
+
+----
+// NOTCONSOLE
+
+[discrete]
+[[retrievers-examples-rrf-ranking-on-text-similarity-reranker-results]]
+==== Example: RRF with semantic reranker
+
+For this example, we'll replace the rrf's `standard` retriever with the `text_similarity_reranker` retriever, using the
+`my-rerank-model` reranker we previously configured. Since this is a reranker, it needs an initial pool of
+documents to work with. In this case, we'll rerank the top `rank_window_size` documents matching the  `ai` topic.
+
+[source,js]
+----
+GET /retrievers_example/_search
+{
+    "retriever": {
+        "rrf": {
+            "retrievers": [
+                {
+                    "knn": {
+                        "field": "vector",
+                        "query_vector": [
+                            0.23,
+                            0.67,
+                            0.89
+                        ],
+                        "k": 3,
+                        "num_candidates": 5
+                    }
+                },
+                {
+                    "text_similarity_reranker": {
+                        "retriever": {
+                            "standard": {
+                                "query": {
+                                    "term": {
+                                        "topic": "ai"
+                                    }
+                                }
+                            }
+                        },
+                        "field": "text",
+                        "inference_id": "my-rerank-model",
+                        "inference_text": "Can I use generative AI to identify user intent and improve search relevance?"
+                    }
+                }
+            ],
+            "rank_window_size": 10,
+            "rank_constant": 1
+        }
+    },
+    "_source": false
+}
+----
+// NOTCONSOLE
+
+[discrete]
+[[retrievers-examples-chaining-text-similarity-reranker-retrievers]]
+==== Example: Chaining multiple semantic rerankers
+
+Full composability means we can chain together multiple retrievers of the same type. For instance,
+imagine we have a computationally expensive reranker that's specialized for AI content. We can rerank the results of a `text_similarity_reranker` using another `text_similarity_reranker` retriever. Each reranker can operate on different fields and/or use different inference services.
+
+[source,js]
+----
+GET retrievers_example/_search
+{
+   "retriever": {
+       "text_similarity_reranker": {
+           "retriever": {
+               "text_similarity_reranker": {
+                   "retriever": {
+                       "knn": {
+                           "field": "vector",
+                           "query_vector": [
+                               0.23,
+                               0.67,
+                               0.89
+                           ],
+                           "k": 3,
+                           "num_candidates": 5
+                       }
+                   },
+                   "rank_window_size": 100,
+                   "field": "text",
+                   "inference_id": "my-rerank-model",
+                   "inference_text": "What are the state of the art applications of AI in information retrieval?"
+               }
+           },
+           "rank_window_size": 10,
+           "field": "text",
+           "inference_id": "my-other-more-expensive-rerank-model",
+           "inference_text": "Applications of Large Language Models in technology and their impact on user satisfaction"
+       }
+   },
+    "_source": false
+}
+----
+// NOTCONSOLE
+
+Note that our example applies two reranking steps. First, we rerank the top 100
+documents from the `knn` search using the `my-rerank-model` reranker. Then we
+pick the top 10 results and rerank them using the more fine-grained
+`my-other-more-expensive-rerank-model`.

From 18a5cf6f730660492d6b4aac8a216f7ee7b72f20 Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Thu, 14 Nov 2024 13:10:39 +0200
Subject: [PATCH 06/15] marking snippet as js

---
 .../search/search-your-data/retrievers-examples.asciidoc      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index 23d898bee3fb8..ac2386be2a33d 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -641,7 +641,7 @@ To demonstrate the full functionality of retrievers, the following examples also
 
 Let's setup a reranking service and use it through the `text_similarity_reranker` retriever to rerank our top results.
 
-[source,console]
+[source,js]
 ----
 PUT _inference/rerank/my-rerank-model
 {
@@ -656,7 +656,7 @@ PUT _inference/rerank/my-rerank-model
 
 Let's start by reranking the results of the `rrf` retriever in our previous example.
 
-[source,console]
+[source,js]
 ----
 GET retrievers_example/_search
 {

From 17ef862a813ed3e3d4c05e40757c9fde5c50a57c Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Thu, 14 Nov 2024 13:13:11 +0200
Subject: [PATCH 07/15] marking snippet as test-skip

---
 .../retrievers-examples.asciidoc                 | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index ac2386be2a33d..e06fd8ab15aec 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -641,7 +641,7 @@ To demonstrate the full functionality of retrievers, the following examples also
 
 Let's setup a reranking service and use it through the `text_similarity_reranker` retriever to rerank our top results.
 
-[source,js]
+[source,console]
 ----
 PUT _inference/rerank/my-rerank-model
 {
@@ -652,11 +652,11 @@ PUT _inference/rerank/my-rerank-model
  }
 }
 ----
-//NOTCONSOLE
+// TEST[skip: no_access_to_ml]
 
 Let's start by reranking the results of the `rrf` retriever in our previous example.
 
-[source,js]
+[source,console]
 ----
 GET retrievers_example/_search
 {
@@ -701,7 +701,7 @@ GET retrievers_example/_search
 }
 
 ----
-// NOTCONSOLE
+// TEST[skip: no_access_to_ml]
 
 [discrete]
 [[retrievers-examples-rrf-ranking-on-text-similarity-reranker-results]]
@@ -711,7 +711,7 @@ For this example, we'll replace the rrf's `standard` retriever with the `text_si
 `my-rerank-model` reranker we previously configured. Since this is a reranker, it needs an initial pool of
 documents to work with. In this case, we'll rerank the top `rank_window_size` documents matching the  `ai` topic.
 
-[source,js]
+[source,console]
 ----
 GET /retrievers_example/_search
 {
@@ -754,7 +754,7 @@ GET /retrievers_example/_search
     "_source": false
 }
 ----
-// NOTCONSOLE
+// TEST[skip: no_access_to_ml]
 
 [discrete]
 [[retrievers-examples-chaining-text-similarity-reranker-retrievers]]
@@ -763,7 +763,7 @@ GET /retrievers_example/_search
 Full composability means we can chain together multiple retrievers of the same type. For instance,
 imagine we have a computationally expensive reranker that's specialized for AI content. We can rerank the results of a `text_similarity_reranker` using another `text_similarity_reranker` retriever. Each reranker can operate on different fields and/or use different inference services.
 
-[source,js]
+[source,console]
 ----
 GET retrievers_example/_search
 {
@@ -798,7 +798,7 @@ GET retrievers_example/_search
     "_source": false
 }
 ----
-// NOTCONSOLE
+// TEST[skip: no_access_to_ml]
 
 Note that our example applies two reranking steps. First, we rerank the top 100
 documents from the `knn` search using the `my-rerank-model` reranker. Then we

From 03c46f2bc3fd615c73030f276d9ce4f506d24edc Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Tue, 19 Nov 2024 17:12:04 +0200
Subject: [PATCH 08/15] iter

---
 .../retrievers-examples.asciidoc              | 432 ++++++++++++++++++
 1 file changed, 432 insertions(+)

diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index e06fd8ab15aec..aa7709aa8d7f8 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -327,6 +327,438 @@ Which would return the following response collapsed results
 ----
 // TESTRESPONSE[s/"took": 42/"took": $body.took/]
 
+
+[discrete]
+[[retrievers-examples-highlighting-retriever-results]]
+==== Example: Highlighting results based on nested sub-retrievers
+
+Highlighting is now also available for nested sub-retrievers matches. For example, consider the same
+`rrf` retriever as above, with a `knn` and `standard` retriever as its sub-retrievers. We can specify a `highlight`
+section, as defined in <<highlighting,highlighting>> documentation, and compute highlights for the top results.
+
+[source,console]
+----
+GET /retrievers_example/_search
+{
+    "retriever": {
+        "rrf": {
+            "retrievers": [
+                {
+                    "standard": {
+                        "query": {
+                            "query_string": {
+                                "query": "(information retrieval) OR (artificial intelligence)",
+                                "default_field": "text"
+                            }
+                        }
+                    }
+                },
+                {
+                    "knn": {
+                        "field": "vector",
+                        "query_vector": [
+                            0.23,
+                            0.67,
+                            0.89
+                        ],
+                        "k": 3,
+                        "num_candidates": 5
+                    }
+                }
+            ],
+            "rank_window_size": 10,
+            "rank_constant": 1
+        }
+    },
+    "highlight": {
+        "fields": {
+            "text": {
+                "fragment_size": 150,
+                "number_of_fragments": 3
+            }
+        }
+    },
+    "_source": false
+}
+----
+// TEST[continued]
+
+This would highlight the `text` field, base on the matches produced by the `standard` retriever. The highlighted snippets
+would then be included in the response as usual, i.e. under each search hit.
+
+[source,console-result]
+----
+{
+    "took": 42,
+    "timed_out": false,
+    "_shards": {
+        "total": 1,
+        "successful": 1,
+        "skipped": 0,
+        "failed": 0
+    },
+    "hits": {
+        "total": {
+            "value": 3,
+            "relation": "eq"
+        },
+        "max_score": 0.8333334,
+        "hits": [
+            {
+                "_index": "retrievers_example",
+                "_id": "1",
+                "_score": 0.8333334,
+                "highlight": {
+                    "text": [
+                        "Large language models are revolutionizing <em>information</em> <em>retrieval</em> by boosting search precision, deepening contextual understanding, and reshaping user experiences"
+                    ]
+                }
+            },
+            {
+                "_index": "retrievers_example",
+                "_id": "2",
+                "_score": 0.8333334,
+                "highlight": {
+                    "text": [
+                        "<em>Artificial</em> <em>intelligence</em> is transforming medicine, from advancing diagnostics and tailoring treatment plans to empowering predictive patient care for improved"
+                    ]
+                }
+            },
+            {
+                "_index": "retrievers_example",
+                "_id": "3",
+                "_score": 0.25
+            }
+        ]
+    }
+}
+----
+// TESTRESPONSE[s/"took": 42/"took": $body.took/]
+
+
+[discrete]
+[[retrievers-examples-inner-hits-retriever-results]]
+==== Example: Computing inner hits from nested sub-retrievers
+
+We can also define `inner_hits` to be computed on any of the sub-retrievers, and propagate those computations to the top
+level compound retriever. For example, let's create a new index with a `knn` field, nested under the `nested_field` field,
+and index  a couple of documents.
+
+
+[source,console]
+----
+PUT retrievers_example_nested
+{
+    "mappings": {
+        "properties": {
+            "nested_field": {
+                "type": "nested",
+                "properties": {
+                    "paragraph_id": {
+                        "type": "keyword"
+                    },
+                    "nested_vector": {
+                        "type": "dense_vector",
+                        "dims": 3,
+                        "similarity": "l2_norm",
+                        "index": true
+                    }
+                }
+            },
+            "topic": {
+                "type": "keyword"
+            }
+        }
+    }
+}
+
+POST /retrievers_example_nested/_doc/1
+{
+    "nested_field": [
+        {
+            "paragraph_id": "1a",
+            "nested_vector": [
+                -1.12,
+                -0.59,
+                0.78
+            ]
+        },
+        {
+            "paragraph_id": "1b",
+            "nested_vector": [
+                -0.12,
+                1.56,
+                0.42
+            ]
+        },
+        {
+            "paragraph_id": "1c",
+            "nested_vector": [
+                1,
+                -1,
+                0
+            ]
+        }
+    ],
+    "topic": [
+        "ai"
+    ]
+}
+
+POST /retrievers_example_nested/_doc/2
+{
+    "nested_field": [
+        {
+            "paragraph_id": "2a",
+            "nested_vector": [
+                0.23,
+                1.24,
+                0.65
+            ]
+        }
+    ],
+    "topic": [
+        "information_retrieval"
+    ]
+}
+
+POST /retrievers_example_nested/_doc/3
+{
+    "topic": [
+        "ai"
+    ]
+}
+
+POST /retrievers_example_nested/_refresh
+----
+// TEST[continued]
+
+[source,console]
+----
+GET /retrievers_example_nested/_search
+{
+    "retriever": {
+        "rrf": {
+            "retrievers": [
+                {
+                    "standard": {
+                        "query": {
+                            "nested": {
+                                "path": "nested_field",
+                                "inner_hits": {
+                                    "name": "nested_vector",
+                                    "_source": false,
+                                    "fields": [
+                                        "nested_field.paragraph_id"
+                                    ]
+                                },
+                                "query": {
+                                    "knn": {
+                                        "field": "nested_field.nested_vector",
+                                        "query_vector": [
+                                            1,
+                                            0,
+                                            0.5
+                                        ],
+                                        "k": 10
+                                    }
+                                }
+                            }
+                        }
+                    }
+                },
+                {
+                    "standard": {
+                        "query": {
+                            "term": {
+                                "topic": "ai"
+                            }
+                        }
+                    }
+                }
+            ],
+            "rank_window_size": 10,
+            "rank_constant": 1
+        }
+    },
+    "_source": [
+        "topic"
+    ]
+}
+----
+// TEST[continued]
+
+This would propagate the `inner_hits` defined for the `knn` query to the `rrf` retriever, and compute inner hits for `rrf`'s top results.
+The response would look like the following:
+
+Note:: if using more than one `inner_hits` we currently need to provide custom names for each `inner_hits` so that they
+are unique across all retrievers within the request.
+
+[source,console-result]
+----
+{
+    "took": 42,
+    "timed_out": false,
+    "_shards": {
+        "total": 1,
+        "successful": 1,
+        "skipped": 0,
+        "failed": 0
+    },
+    "hits": {
+        "total": {
+            "value": 3,
+            "relation": "eq"
+        },
+        "max_score": 1.0,
+        "hits": [
+            {
+                "_index": "retrievers_example_nested",
+                "_id": "1",
+                "_score": 1.0,
+                "_source": {
+                    "topic": [
+                        "ai"
+                    ]
+                },
+                "inner_hits": {
+                    "nested_vector": {
+                        "hits": {
+                            "total": {
+                                "value": 3,
+                                "relation": "eq"
+                            },
+                            "max_score": 0.44353113,
+                            "hits": [
+                                {
+                                    "_index": "retrievers_example_nested",
+                                    "_id": "1",
+                                    "_nested": {
+                                        "field": "nested_field",
+                                        "offset": 2
+                                    },
+                                    "_score": 0.44353113,
+                                    "fields": {
+                                        "nested_field": [
+                                            {
+                                                "paragraph_id": [
+                                                    "1c"
+                                                ]
+                                            }
+                                        ]
+                                    }
+                                },
+                                {
+                                    "_index": "retrievers_example_nested",
+                                    "_id": "1",
+                                    "_nested": {
+                                        "field": "nested_field",
+                                        "offset": 1
+                                    },
+                                    "_score": 0.26567122,
+                                    "fields": {
+                                        "nested_field": [
+                                            {
+                                                "paragraph_id": [
+                                                    "1b"
+                                                ]
+                                            }
+                                        ]
+                                    }
+                                },
+                                {
+                                    "_index": "retrievers_example_nested",
+                                    "_id": "1",
+                                    "_nested": {
+                                        "field": "nested_field",
+                                        "offset": 0
+                                    },
+                                    "_score": 0.18478848,
+                                    "fields": {
+                                        "nested_field": [
+                                            {
+                                                "paragraph_id": [
+                                                    "1a"
+                                                ]
+                                            }
+                                        ]
+                                    }
+                                }
+                            ]
+                        }
+                    }
+                }
+            },
+            {
+                "_index": "retrievers_example_nested",
+                "_id": "2",
+                "_score": 0.33333334,
+                "_source": {
+                    "topic": [
+                        "information_retrieval"
+                    ]
+                },
+                "inner_hits": {
+                    "nested_vector": {
+                        "hits": {
+                            "total": {
+                                "value": 1,
+                                "relation": "eq"
+                            },
+                            "max_score": 0.32002488,
+                            "hits": [
+                                {
+                                    "_index": "retrievers_example_nested",
+                                    "_id": "2",
+                                    "_nested": {
+                                        "field": "nested_field",
+                                        "offset": 0
+                                    },
+                                    "_score": 0.32002488,
+                                    "fields": {
+                                        "nested_field": [
+                                            {
+                                                "paragraph_id": [
+                                                    "2a"
+                                                ]
+                                            }
+                                        ]
+                                    }
+                                }
+                            ]
+                        }
+                    }
+                }
+            },
+            {
+                "_index": "retrievers_example_nested",
+                "_id": "3",
+                "_score": 0.33333334,
+                "_source": {
+                    "topic": [
+                        "ai"
+                    ]
+                },
+                "inner_hits": {
+                    "nested_vector": {
+                        "hits": {
+                            "total": {
+                                "value": 0,
+                                "relation": "eq"
+                            },
+                            "max_score": null,
+                            "hits": []
+                        }
+                    }
+                }
+            }
+        ]
+    }
+}
+----
+// TESTRESPONSE[s/"took": 42/"took": $body.took/]
+
+
 [discrete]
 [[retrievers-examples-rrf-and-aggregations]]
 ==== Example: Combine RRF with aggregations

From 4fc5874bb6ff3afa7a1df7134557a0abb7a89d75 Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Tue, 19 Nov 2024 19:15:00 +0200
Subject: [PATCH 09/15] iter

---
 docs/reference/search/rrf.asciidoc            | 88 ++++++++++++++++++-
 .../retrievers-examples.asciidoc              | 13 +--
 2 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc
index edd3b67e3de04..dc74f6a191b32 100644
--- a/docs/reference/search/rrf.asciidoc
+++ b/docs/reference/search/rrf.asciidoc
@@ -105,7 +105,7 @@ The `rrf` retriever does not currently support:
 * <<rescore, rescore>>
 
 Using unsupported features as part of a search with an `rrf` retriever results in an exception.
-+
+
 IMPORTANT: It is best to avoid providing a <<search-api-pit, point in time>> as part of the request, as
 RRF creates one internally that is shared by all sub-retrievers to ensure consistent results.
 
@@ -703,3 +703,89 @@ So for the same params as above, we would now have:
 
 * `from=0, size=2` would return [`1`, `5`] with ranks `[1, 2]`
 * `from=2, size=2` would return an empty result set as it would fall outside the available `rank_window_size` results.
+
+==== Aggregations in RRF
+
+Using the `rrf` retriever, we can also gather aggregations from all its specified sub-retrievers. The aggregations gathered
+are irrespective of the specified `rank_window_size`, i.e. we collect all matching documents and just the top `rank_window_size`,
+and refer to the union of the result sets from all sub-retrievers.
+
+So for example, assuming that we have the following documents:
+[source,python]
+----
+# doc     |   termA      |    termB    |
+_id: 1 =       foo
+_id: 2 =       foo             bar
+_id: 3 =     aardvark          bar
+_id: 4 =       foo             bar
+----
+// NOTCONSOLE
+
+And the following `rrf` query with a term aggregation specified on field `termA`:
+[source,js]
+----
+{
+    "retriever": {
+        "rrf": {
+            "retrievers": [
+                {
+                    "standard": {
+                        "query": {
+                            "term": {
+                                "termB": "bar"
+                            }
+                        }
+                    }
+                },
+                {
+                    "standard": {
+                        "query": {
+                            "match_all": { }
+                        }
+                    }
+                }
+            ],
+            "rank_window_size": 1
+        }
+    },
+    "size": 1,
+    "aggs": {
+        "termA_agg": {
+            "terms": {
+                "field": "termA"
+            }
+        }
+    }
+}
+----
+// NOTCONSOLE
+
+Even though we have a `rank_window_size: 1`, the aggregations will be computed against **all** matching documents from the nested sub-retrievers.
+So, the expected aggs would be:
+[source, js]
+----
+{
+    "foo": 3,
+    "aardvark": 1
+}
+
+----
+// NOTCONSOLE
+
+==== Highlighting in RRF
+
+Similarly to above, we can also add <<highlighting, highlight snippets>> to the `rrf` retriever's results. Highlighted snippets are computed based
+on the matching text queries defined on the sub-retrievers.
+
+NOTE:: Highlighting on vector fields, using either the `knn` retriever or a `knn` query, is not supported at the moment.
+
+A more specific example of highlighting in RRF can also be found in the <<retrievers-examples-highlighting-retriever-results, retrievers examples>> page.
+
+==== Inner hits in RRF
+
+Computing <<inner-hits, inner hits>> is now also an option for RRF. We can specify inner hits as part of a nested sub-retriever, which
+will be propagated to the top level parent retriever. Actual inner hit computation will take place only at end of `rrf` retriever's evaluation on the top matching documents,
+and not as part of the query execution of the nested sub-retrievers.
+
+IMPORTANT:: If we have more than one `inner_hit` sections defined across all sub-retrievers for RRF, then we have
+to provide a custom name for each `inner_hit` that would be unique across all sub-retrievers for the search request.
diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index aa7709aa8d7f8..1180960b5ea43 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -181,8 +181,8 @@ Which would return the following response based on the final rrf score for each
 ==== Example: Grouping results by year with `collapse`
 
 In our result set, we have many documents with the same `year` value. We can clean this
-up using the `collapse` parameter with our retriever. This enables grouping results by
-any field and returns only the highest-scoring document from each group. In this example
+up using the `collapse` parameter with our retriever. This, as with the standard <<collapse-search-results, collapse>> feature,
+enables grouping results by any field and returns only the highest-scoring document from each group. In this example
 we'll collapse our results based on the `year` field.
 
 [source,console]
@@ -383,7 +383,7 @@ GET /retrievers_example/_search
 ----
 // TEST[continued]
 
-This would highlight the `text` field, base on the matches produced by the `standard` retriever. The highlighted snippets
+This would highlight the `text` field, based on the matches produced by the `standard` retriever. The highlighted snippets
 would then be included in the response as usual, i.e. under each search hit.
 
 [source,console-result]
@@ -533,6 +533,9 @@ POST /retrievers_example_nested/_refresh
 ----
 // TEST[continued]
 
+Then, let's run an `rrf` retriever query, where we also want to compute <<inner-hits, inner hits>> for the `nested_field.nested_vector`
+field, based on the `knn` query specified.
+
 [source,console]
 ----
 GET /retrievers_example_nested/_search
@@ -591,8 +594,6 @@ GET /retrievers_example_nested/_search
 This would propagate the `inner_hits` defined for the `knn` query to the `rrf` retriever, and compute inner hits for `rrf`'s top results.
 The response would look like the following:
 
-Note:: if using more than one `inner_hits` we currently need to provide custom names for each `inner_hits` so that they
-are unique across all retrievers within the request.
 
 [source,console-result]
 ----
@@ -758,6 +759,8 @@ are unique across all retrievers within the request.
 ----
 // TESTRESPONSE[s/"took": 42/"took": $body.took/]
 
+Note:: if using more than one `inner_hits` we currently need to provide custom names for each `inner_hits` so that they
+are unique across all retrievers within the request.
 
 [discrete]
 [[retrievers-examples-rrf-and-aggregations]]

From 9cfb5f450d7eeaf4fd87e91509898305d640219b Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Tue, 19 Nov 2024 19:45:21 +0200
Subject: [PATCH 10/15] iter

---
 docs/reference/search/rrf.asciidoc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc
index dc74f6a191b32..5c1d5815e793c 100644
--- a/docs/reference/search/rrf.asciidoc
+++ b/docs/reference/search/rrf.asciidoc
@@ -707,8 +707,8 @@ So for the same params as above, we would now have:
 ==== Aggregations in RRF
 
 Using the `rrf` retriever, we can also gather aggregations from all its specified sub-retrievers. The aggregations gathered
-are irrespective of the specified `rank_window_size`, i.e. we collect all matching documents and just the top `rank_window_size`,
-and refer to the union of the result sets from all sub-retrievers.
+are irrespective of the specified `rank_window_size` but instead we refer to the union of the result sets from all sub-retrievers,
+i.e. we collect all matching documents and not just the top `rank_window_size`.
 
 So for example, assuming that we have the following documents:
 [source,python]

From 22f45c1b2cb50ae40b487c4d9b421dda9288249f Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Wed, 27 Nov 2024 17:01:47 +0200
Subject: [PATCH 11/15] Apply suggestions from code review

Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com>
---
 docs/reference/search/rrf.asciidoc            | 50 +++++++++++--------
 .../retrievers-examples.asciidoc              |  5 +-
 2 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc
index 5c1d5815e793c..a942c0162a80a 100644
--- a/docs/reference/search/rrf.asciidoc
+++ b/docs/reference/search/rrf.asciidoc
@@ -706,22 +706,25 @@ So for the same params as above, we would now have:
 
 ==== Aggregations in RRF
 
-Using the `rrf` retriever, we can also gather aggregations from all its specified sub-retrievers. The aggregations gathered
-are irrespective of the specified `rank_window_size` but instead we refer to the union of the result sets from all sub-retrievers,
-i.e. we collect all matching documents and not just the top `rank_window_size`.
+The `rrf` retriever supports aggregations from all specified sub-retrievers. Important notes about aggregations:
 
-So for example, assuming that we have the following documents:
-[source,python]
+* They operate on the complete result set from all sub-retrievers
+* They are not limited by the `rank_window_size` parameter
+* They process the union of all matching documents
+
+For example, consider the following document set:
+[source,js]
 ----
-# doc     |   termA      |    termB    |
-_id: 1 =       foo
-_id: 2 =       foo             bar
-_id: 3 =     aardvark          bar
-_id: 4 =       foo             bar
+{
+    "_id": 1, "termA": "foo",
+    "_id": 2, "termA": "foo", "termB": "bar",
+    "_id": 3, "termA": "aardvark", "termB": "bar",
+    "_id": 4, "termA": "foo", "termB": "bar"
+}
 ----
 // NOTCONSOLE
 
-And the following `rrf` query with a term aggregation specified on field `termA`:
+Perform a term aggregation on the `termA` field using an `rrf` retriever:
 [source,js]
 ----
 {
@@ -760,8 +763,7 @@ And the following `rrf` query with a term aggregation specified on field `termA`
 ----
 // NOTCONSOLE
 
-Even though we have a `rank_window_size: 1`, the aggregations will be computed against **all** matching documents from the nested sub-retrievers.
-So, the expected aggs would be:
+The aggregation results will include *all* matching documents, regardless of `rank_window_size`.
 [source, js]
 ----
 {
@@ -774,18 +776,26 @@ So, the expected aggs would be:
 
 ==== Highlighting in RRF
 
-Similarly to above, we can also add <<highlighting, highlight snippets>> to the `rrf` retriever's results. Highlighted snippets are computed based
+Using the `rrf` retriever, you can add <<highlighting, highlight snippets>> to show relevant text snippets in your search results. Highlighted snippets are computed based
 on the matching text queries defined on the sub-retrievers.
 
-NOTE:: Highlighting on vector fields, using either the `knn` retriever or a `knn` query, is not supported at the moment.
+IMPORTANT: Highlighting on vector fields, using either the `knn` retriever or a `knn` query, is not supported.
 
 A more specific example of highlighting in RRF can also be found in the <<retrievers-examples-highlighting-retriever-results, retrievers examples>> page.
 
 ==== Inner hits in RRF
 
-Computing <<inner-hits, inner hits>> is now also an option for RRF. We can specify inner hits as part of a nested sub-retriever, which
-will be propagated to the top level parent retriever. Actual inner hit computation will take place only at end of `rrf` retriever's evaluation on the top matching documents,
-and not as part of the query execution of the nested sub-retrievers.
+The `rrf` retriever supports <<inner-hits,inner hits>> functionality, allowing you to retrieve 
+related nested or parent/child documents alongside your main search results. Inner hits can be 
+specified as part of any nested sub-retriever and will be propagated to the top-level parent 
+retriever. Note that the inner hit computation will take place only at end of `rrf` retriever's 
+evaluation on the top matching documents, and not as part of the query execution of the nested 
+sub-retrievers.
+
+[IMPORTANT]
+====
+When defining multiple `inner_hits` sections across sub-retrievers:
 
-IMPORTANT:: If we have more than one `inner_hit` sections defined across all sub-retrievers for RRF, then we have
-to provide a custom name for each `inner_hit` that would be unique across all sub-retrievers for the search request.
+* Each `inner_hits` section must have a unique name
+* Names must be unique across all sub-retrievers in the search request
+====
diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index 1180960b5ea43..21a8ac51643a0 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -1,7 +1,6 @@
 [[retrievers-examples]]
 
-Let's work through some hands-on examples to show the different usages and functionalities that
-retrievers now offer.
+Learn how to combine different retrievers in these hands-on examples.
 
 === Retrievers examples
 
@@ -81,7 +80,7 @@ POST /retrievers_example/_refresh
 ----
 // TESTSETUP
 
-Now that we  have our documents in place, let's try to run some queries using retrievers.
+Now that we have our documents in place, let's try to run some queries using retrievers.
 
 [discrete]
 [[retrievers-examples-combining-standard-knn-retrievers-with-rrf]]

From 078d4b1037493c3f5ddf4ff2aabff3b1c6596537 Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Wed, 27 Nov 2024 17:06:19 +0200
Subject: [PATCH 12/15] wrapping all responses in collapsibles

---
 .../retrievers-examples.asciidoc              | 27 ++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index 21a8ac51643a0..7a707197bdadb 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -136,6 +136,9 @@ GET /retrievers_example/_search
 
 Which would return the following response based on the final rrf score for each result
 
+.Example response
+[%collapsible]
+==============
 [source,console-result]
 ----
 {
@@ -174,6 +177,7 @@ Which would return the following response based on the final rrf score for each
 }
 ----
 // TESTRESPONSE[s/"took": 42/"took": $body.took/]
+==============
 
 [discrete]
 [[retrievers-examples-collapsing-retriever-results]]
@@ -232,6 +236,9 @@ GET /retrievers_example/_search
 
 Which would return the following response collapsed results
 
+.Example response
+[%collapsible]
+==============
 [source,console-result]
 ----
 {
@@ -325,7 +332,7 @@ Which would return the following response collapsed results
 }
 ----
 // TESTRESPONSE[s/"took": 42/"took": $body.took/]
-
+==============
 
 [discrete]
 [[retrievers-examples-highlighting-retriever-results]]
@@ -385,6 +392,9 @@ GET /retrievers_example/_search
 This would highlight the `text` field, based on the matches produced by the `standard` retriever. The highlighted snippets
 would then be included in the response as usual, i.e. under each search hit.
 
+.Example response
+[%collapsible]
+==============
 [source,console-result]
 ----
 {
@@ -433,7 +443,7 @@ would then be included in the response as usual, i.e. under each search hit.
 }
 ----
 // TESTRESPONSE[s/"took": 42/"took": $body.took/]
-
+==============
 
 [discrete]
 [[retrievers-examples-inner-hits-retriever-results]]
@@ -593,7 +603,9 @@ GET /retrievers_example_nested/_search
 This would propagate the `inner_hits` defined for the `knn` query to the `rrf` retriever, and compute inner hits for `rrf`'s top results.
 The response would look like the following:
 
-
+.Example response
+[%collapsible]
+==============
 [source,console-result]
 ----
 {
@@ -757,6 +769,7 @@ The response would look like the following:
 }
 ----
 // TESTRESPONSE[s/"took": 42/"took": $body.took/]
+==============
 
 Note:: if using more than one `inner_hits` we currently need to provide custom names for each `inner_hits` so that they
 are unique across all retrievers within the request.
@@ -816,6 +829,9 @@ GET retrievers_example/_search
 ----
 // TEST[continued]
 
+.Example response
+[%collapsible]
+==============
 The output of which would look like the following:
 [source, console-result]
 ----
@@ -900,6 +916,7 @@ The output of which would look like the following:
 }
 ----
 // TESTRESPONSE[s/"took": 42/"took": $body.took/]
+==============
 
 [discrete]
 [[retrievers-examples-explain-multiple-rrf]]
@@ -967,6 +984,9 @@ GET /retrievers_example/_search
 // TEST[continued]
 
 The output of which, albeit a bit verbose, will provide all the necessary info to assist in debugging and reason with ranking
+.Example response
+[%collapsible]
+==============
 [source, console-result]
 ----
 {
@@ -1066,6 +1086,7 @@ The output of which, albeit a bit verbose, will provide all the necessary info t
 // TESTRESPONSE[s/\.\.\./$body.hits.hits.0._explanation.details.1.details.0.details.0.details.0.details.0.details.0/]
 // TESTRESPONSE[s/\*\*\*/$body.hits.hits.0._explanation.details.1.details.0.details.0.details.0.details.1.details.0/]
 // TESTRESPONSE[s/jnrdZFKS3abUgWVsVdj2Vg/$body.hits.hits.0._node/]
+==============
 
 [discrete]
 [[retrievers-examples-text-similarity-reranker-on-top-of-rrf]]

From f66412dac207d69669b6d05075e887e4a2a3ae28 Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Fri, 29 Nov 2024 11:19:42 +0200
Subject: [PATCH 13/15] Apply suggestions from code review

Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com>
---
 .../retrievers-examples.asciidoc              | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index 7a707197bdadb..43391b868776f 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -134,7 +134,7 @@ GET /retrievers_example/_search
 ----
 // TEST
 
-Which would return the following response based on the final rrf score for each result
+This returns the following response based on the final rrf score for each result.
 
 .Example response
 [%collapsible]
@@ -234,7 +234,7 @@ GET /retrievers_example/_search
 ----
 // TEST[continued]
 
-Which would return the following response collapsed results
+This returns the following response with collapsed results.
 
 .Example response
 [%collapsible]
@@ -451,7 +451,7 @@ would then be included in the response as usual, i.e. under each search hit.
 
 We can also define `inner_hits` to be computed on any of the sub-retrievers, and propagate those computations to the top
 level compound retriever. For example, let's create a new index with a `knn` field, nested under the `nested_field` field,
-and index  a couple of documents.
+and index a couple of documents.
 
 
 [source,console]
@@ -542,7 +542,7 @@ POST /retrievers_example_nested/_refresh
 ----
 // TEST[continued]
 
-Then, let's run an `rrf` retriever query, where we also want to compute <<inner-hits, inner hits>> for the `nested_field.nested_vector`
+Now we can run an `rrf` retriever query and also compute <<inner-hits, inner hits>> for the `nested_field.nested_vector`
 field, based on the `knn` query specified.
 
 [source,console]
@@ -601,7 +601,6 @@ GET /retrievers_example_nested/_search
 // TEST[continued]
 
 This would propagate the `inner_hits` defined for the `knn` query to the `rrf` retriever, and compute inner hits for `rrf`'s top results.
-The response would look like the following:
 
 .Example response
 [%collapsible]
@@ -771,7 +770,7 @@ The response would look like the following:
 // TESTRESPONSE[s/"took": 42/"took": $body.took/]
 ==============
 
-Note:: if using more than one `inner_hits` we currently need to provide custom names for each `inner_hits` so that they
+Note: if using more than one `inner_hits` we need to provide custom names for each `inner_hits` so that they
 are unique across all retrievers within the request.
 
 [discrete]
@@ -832,7 +831,6 @@ GET retrievers_example/_search
 .Example response
 [%collapsible]
 ==============
-The output of which would look like the following:
 [source, console-result]
 ----
 {
@@ -921,9 +919,10 @@ The output of which would look like the following:
 [discrete]
 [[retrievers-examples-explain-multiple-rrf]]
 ==== Example: Explainability with multiple retrievers
+
 By adding `explain: true` to the request, each retriever will now provide a detailed explanation of all the steps
-and calculations that took place for the final score to be computed. Composability is fully supported as well in the context of `explain`, and
-each retriever will provide its own explanation, as we can see in the example below
+and calculations required to compute the final score. Composability is fully supported in the context of `explain`, and
+each retriever will provide its own explanation, as shown in the example below.
 
 [source,console]
 ----
@@ -983,7 +982,8 @@ GET /retrievers_example/_search
 ----
 // TEST[continued]
 
-The output of which, albeit a bit verbose, will provide all the necessary info to assist in debugging and reason with ranking
+The output of which, albeit a bit verbose, will provide all the necessary info to assist in debugging and reason with ranking.
+
 .Example response
 [%collapsible]
 ==============
@@ -1094,7 +1094,7 @@ The output of which, albeit a bit verbose, will provide all the necessary info t
 
 To demonstrate the full functionality of retrievers, the following examples also require access to a <<semantic-reranking-models,semantic reranking model>> set up using the <<inference-apis,Elastic inference APIs>>.
 
-Let's setup a reranking service and use it through the `text_similarity_reranker` retriever to rerank our top results.
+In this example we'll set up a reranking service and use it with the `text_similarity_reranker` retriever to rerank our top results.
 
 [source,console]
 ----

From ae306625f6b0dcb7cf54d8040d0d0ce840a3cb56 Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Fri, 29 Nov 2024 11:20:07 +0200
Subject: [PATCH 14/15] Update
 docs/reference/search/search-your-data/retrievers-examples.asciidoc

Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com>
---
 .../search/search-your-data/retrievers-examples.asciidoc         | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index 43391b868776f..be34c0739ad81 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -453,7 +453,6 @@ We can also define `inner_hits` to be computed on any of the sub-retrievers, and
 level compound retriever. For example, let's create a new index with a `knn` field, nested under the `nested_field` field,
 and index a couple of documents.
 
-
 [source,console]
 ----
 PUT retrievers_example_nested

From 28c7fae7453c93a78abdb14173692a1838267022 Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Fri, 29 Nov 2024 14:26:37 +0200
Subject: [PATCH 15/15] minor formatting changes on json requests

---
 .../retrievers-examples.asciidoc              | 270 +++++++++---------
 1 file changed, 136 insertions(+), 134 deletions(-)

diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
index be34c0739ad81..ad1cc32dcee01 100644
--- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc
+++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc
@@ -99,37 +99,37 @@ retriever. This retriever operates on top of two other retrievers: a `knn` retri
 ----
 GET /retrievers_example/_search
 {
-   "retriever":{
-       "rrf": {
-           "retrievers":[
-               {
-                   "standard":{
-                       "query":{
-                           "query_string":{
-                              "query": "(information retrieval) OR (artificial intelligence)",
-                              "default_field": "text"
-                           }
-                       }
-                   }
-               },
-               {
-                   "knn": {
-                       "field": "vector",
-                       "query_vector": [
-                           0.23,
-                           0.67,
-                           0.89
-                       ],
-                       "k": 3,
-                       "num_candidates": 5
-                   }
-               }
-           ],
-           "rank_window_size": 10,
-           "rank_constant": 1
-       }
-   },
-   "_source": false
+    "retriever": {
+        "rrf": {
+            "retrievers": [
+                {
+                    "standard": {
+                        "query": {
+                            "query_string": {
+                                "query": "(information retrieval) OR (artificial intelligence)",
+                                "default_field": "text"
+                            }
+                        }
+                    }
+                },
+                {
+                    "knn": {
+                        "field": "vector",
+                        "query_vector": [
+                            0.23,
+                            0.67,
+                            0.89
+                        ],
+                        "k": 3,
+                        "num_candidates": 5
+                    }
+                }
+            ],
+            "rank_window_size": 10,
+            "rank_constant": 1
+        }
+    },
+    "_source": false
 }
 ----
 // TEST
@@ -192,43 +192,45 @@ we'll collapse our results based on the `year` field.
 ----
 GET /retrievers_example/_search
 {
-   "retriever":{
-       "rrf": {
-           "retrievers":[
-               {
-                   "standard":{
-                       "query":{
-                           "query_string":{
-                              "query": "(information retrieval) OR (artificial intelligence)",
-                              "default_field": "text"
-                           }
-                       }
-                   }
-               },
-               {
-                   "knn": {
-                       "field": "vector",
-                       "query_vector": [
-                           0.23,
-                           0.67,
-                           0.89
-                       ],
-                       "k": 3,
-                       "num_candidates": 5
-                   }
-               }
-           ],
-           "rank_window_size": 10,
-           "rank_constant": 1
-       }
-   },
-   "collapse": {
-       "field": "year",
-       "inner_hits": {
-           "name": "topic related documents",
-           "_source": ["year"]
-       }
-   },
+    "retriever": {
+        "rrf": {
+            "retrievers": [
+                {
+                    "standard": {
+                        "query": {
+                            "query_string": {
+                                "query": "(information retrieval) OR (artificial intelligence)",
+                                "default_field": "text"
+                            }
+                        }
+                    }
+                },
+                {
+                    "knn": {
+                        "field": "vector",
+                        "query_vector": [
+                            0.23,
+                            0.67,
+                            0.89
+                        ],
+                        "k": 3,
+                        "num_candidates": 5
+                    }
+                }
+            ],
+            "rank_window_size": 10,
+            "rank_constant": 1
+        }
+    },
+    "collapse": {
+        "field": "year",
+        "inner_hits": {
+            "name": "topic related documents",
+            "_source": [
+                "year"
+            ]
+        }
+    },
     "_source": false
 }
 ----
@@ -1114,44 +1116,44 @@ Let's start by reranking the results of the `rrf` retriever in our previous exam
 ----
 GET retrievers_example/_search
 {
-   "retriever": {
-       "text_similarity_reranker": {
-           "retriever": {
-               "rrf": {
-                   "retrievers": [
-                       {
-                           "standard":{
-                               "query":{
-                                   "query_string":{
-                                      "query": "(information retrieval) OR (artificial intelligence)",
-                                      "default_field": "text"
-                                   }
-                               }
-                           }
-                       },
-                       {
-                           "knn": {
-                               "field": "vector",
-                               "query_vector": [
-                                   0.23,
-                                   0.67,
-                                   0.89
-                               ],
-                               "k": 3,
-                               "num_candidates": 5
-                           }
-                       }
-                   ],
-                   "rank_window_size": 10,
-                   "rank_constant": 1
-               }
-           },
-           "field": "text",
-           "inference_id": "my-rerank-model",
-           "inference_text": "What are the state of the art applications of AI in information retrieval?"
-       }
-   },
-   "_source": false
+    "retriever": {
+        "text_similarity_reranker": {
+            "retriever": {
+                "rrf": {
+                    "retrievers": [
+                        {
+                            "standard": {
+                                "query": {
+                                    "query_string": {
+                                        "query": "(information retrieval) OR (artificial intelligence)",
+                                        "default_field": "text"
+                                    }
+                                }
+                            }
+                        },
+                        {
+                            "knn": {
+                                "field": "vector",
+                                "query_vector": [
+                                    0.23,
+                                    0.67,
+                                    0.89
+                                ],
+                                "k": 3,
+                                "num_candidates": 5
+                            }
+                        }
+                    ],
+                    "rank_window_size": 10,
+                    "rank_constant": 1
+                }
+            },
+            "field": "text",
+            "inference_id": "my-rerank-model",
+            "inference_text": "What are the state of the art applications of AI in information retrieval?"
+        }
+    },
+    "_source": false
 }
 
 ----
@@ -1221,34 +1223,34 @@ imagine we have a computationally expensive reranker that's specialized for AI c
 ----
 GET retrievers_example/_search
 {
-   "retriever": {
-       "text_similarity_reranker": {
-           "retriever": {
-               "text_similarity_reranker": {
-                   "retriever": {
-                       "knn": {
-                           "field": "vector",
-                           "query_vector": [
-                               0.23,
-                               0.67,
-                               0.89
-                           ],
-                           "k": 3,
-                           "num_candidates": 5
-                       }
-                   },
-                   "rank_window_size": 100,
-                   "field": "text",
-                   "inference_id": "my-rerank-model",
-                   "inference_text": "What are the state of the art applications of AI in information retrieval?"
-               }
-           },
-           "rank_window_size": 10,
-           "field": "text",
-           "inference_id": "my-other-more-expensive-rerank-model",
-           "inference_text": "Applications of Large Language Models in technology and their impact on user satisfaction"
-       }
-   },
+    "retriever": {
+        "text_similarity_reranker": {
+            "retriever": {
+                "text_similarity_reranker": {
+                    "retriever": {
+                        "knn": {
+                            "field": "vector",
+                            "query_vector": [
+                                0.23,
+                                0.67,
+                                0.89
+                            ],
+                            "k": 3,
+                            "num_candidates": 5
+                        }
+                    },
+                    "rank_window_size": 100,
+                    "field": "text",
+                    "inference_id": "my-rerank-model",
+                    "inference_text": "What are the state of the art applications of AI in information retrieval?"
+                }
+            },
+            "rank_window_size": 10,
+            "field": "text",
+            "inference_id": "my-other-more-expensive-rerank-model",
+            "inference_text": "Applications of Large Language Models in technology and their impact on user satisfaction"
+        }
+    },
     "_source": false
 }
 ----