Merge remote-tracking branch 'upstream/main' into inference_metadata_…

…fields
jimczi · Nov 29, 2024 · 78ff84f · 78ff84f
2 parents 8f5e234 + 0b764ad
commit 78ff84f
Show file tree

Hide file tree

Showing 139 changed files with 5,893 additions and 2,776 deletions.
diff --git a/.buildkite/scripts/dra-workflow.sh b/.buildkite/scripts/dra-workflow.sh
@@ -75,7 +75,6 @@ find "$WORKSPACE" -type d -path "*/build/distributions" -exec chmod a+w {} \;
 
 echo --- Running release-manager
 
-set +e
 # Artifacts should be generated
 docker run --rm \
   --name release-manager \
@@ -92,16 +91,4 @@ docker run --rm \
   --version "$ES_VERSION" \
   --artifact-set main \
   --dependency "beats:https://artifacts-${WORKFLOW}.elastic.co/beats/${BEATS_BUILD_ID}/manifest-${ES_VERSION}${VERSION_SUFFIX}.json" \
-  --dependency "ml-cpp:https://artifacts-${WORKFLOW}.elastic.co/ml-cpp/${ML_CPP_BUILD_ID}/manifest-${ES_VERSION}${VERSION_SUFFIX}.json" \
-2>&1 | tee release-manager.log
-EXIT_CODE=$?
-set -e
-
-# This failure is just generating a ton of noise right now, so let's just ignore it
-# This should be removed once this issue has been fixed
-if grep "elasticsearch-ubi-9.0.0-SNAPSHOT-docker-image.tar.gz" release-manager.log; then
-  echo "Ignoring error about missing ubi artifact"
-  exit 0
-fi
-
-exit "$EXIT_CODE"
+  --dependency "ml-cpp:https://artifacts-${WORKFLOW}.elastic.co/ml-cpp/${ML_CPP_BUILD_ID}/manifest-${ES_VERSION}${VERSION_SUFFIX}.json"
diff --git a/...va/org/elasticsearch/gradle/internal/conventions/precommit/FormattingPrecommitPlugin.java b/...va/org/elasticsearch/gradle/internal/conventions/precommit/FormattingPrecommitPlugin.java
@@ -17,6 +17,8 @@
 import org.gradle.api.Project;
 
 import java.io.File;
+import java.util.Arrays;
+import java.util.Map;
 
 /**
  * This plugin configures formatting for Java source using Spotless
@@ -64,7 +66,8 @@ public void apply(Project project) {
                 java.importOrderFile(new File(elasticsearchWorkspace, importOrderPath));
 
                 // Most formatting is done through the Eclipse formatter
-                java.eclipse().configFile(new File(elasticsearchWorkspace, formatterConfigPath));
+                java.eclipse().withP2Mirrors(Map.of("https://download.eclipse.org/", "https://mirror.umd.edu/eclipse/"))
+                    .configFile(new File(elasticsearchWorkspace, formatterConfigPath));
 
                 // Ensure blank lines are actually empty. Since formatters are applied in
                 // order, apply this one last, otherwise non-empty blank lines can creep

diff --git a/docs/changelog/104125.yaml b/docs/changelog/104125.yaml
@@ -0,0 +1,18 @@
+pr: 104125
+summary: Disable machine learning on macOS x86_64
+area: Machine Learning
+type: breaking
+issues: []
+breaking:
+  title: Disable machine learning on macOS x86_64
+  area: Packaging
+  details: The machine learning plugin is permanently disabled on macOS x86_64.
+    For the last three years Apple has been selling hardware based on the arm64
+    architecture, and support will increasingly focus on this architecture in
+    the future. Changes to upstream dependencies of Elastic's machine learning
+    functionality have made it unviable for Elastic to continue to build machine
+    learning on macOS x86_64.
+  impact: To continue to use machine learning functionality on macOS please switch to
+    an arm64 machine (Apple silicon). Alternatively, it will still be possible to run
+    Elasticsearch with machine learning enabled in a Docker container on macOS x86_64.
+  notable: false
diff --git a/docs/changelog/111494.yaml b/docs/changelog/111494.yaml
@@ -0,0 +1,5 @@
+pr: 111494
+summary: Extensible Completion Postings Formats
+area: "Suggesters"
+type: enhancement
+issues: []
diff --git a/docs/changelog/113120.yaml b/docs/changelog/113120.yaml
@@ -0,0 +1,5 @@
+pr: 113120
+summary: ESQL - enabling scoring with METADATA `_score`
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/docs/changelog/117606.yaml b/docs/changelog/117606.yaml
@@ -0,0 +1,5 @@
+pr: 117606
+summary: Remove deprecated sort from reindex operation within dataframe analytics procedure
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/changelog/117618.yaml b/docs/changelog/117618.yaml
@@ -0,0 +1,5 @@
+pr: 117618
+summary: SearchStatesIt failures reported by CI
+area: Search
+type: bug
+issues: [116617, 116618]
diff --git a/docs/changelog/117655.yaml b/docs/changelog/117655.yaml
@@ -0,0 +1,5 @@
+pr: 117655
+summary: Add nulls support to Categorize
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/docs/changelog/117750.yaml b/docs/changelog/117750.yaml
@@ -0,0 +1,6 @@
+pr: 117750
+summary: '`CrossClusterIT` `testCancel` failure'
+area: Search
+type: bug
+issues:
+ - 108061
diff --git a/docs/reference/quickstart/full-text-filtering-tutorial.asciidoc b/docs/reference/quickstart/full-text-filtering-tutorial.asciidoc
@@ -511,8 +511,9 @@ In this tutorial scenario it's useful for when users have complex requirements f
 
 Let's create a query that addresses the following user needs:
 
-* Must be a vegetarian main course
+* Must be a vegetarian recipe
 * Should contain "curry" or "spicy" in the title or description
+* Should be a main course
 * Must not be a dessert
 * Must have a rating of at least 4.5
 * Should prefer recipes published in the last month
@@ -524,16 +525,7 @@ GET /cooking_blog/_search
   "query": {
     "bool": {
       "must": [
-        {
-          "term": {
-            "category.keyword": "Main Course"
-          }
-        },
-        {
-          "term": {
-            "tags": "vegetarian"
-          }
-        },
+        { "term": { "tags": "vegetarian" } },
         {
           "range": {
             "rating": {
@@ -543,10 +535,18 @@ GET /cooking_blog/_search
         }
       ],
       "should": [
+        {
+          "term": {
+            "category": "Main Course"
+          }
+        },
         {
           "multi_match": {
             "query": "curry spicy",
-            "fields": ["title^2", "description"]
+            "fields": [
+              "title^2",
+              "description"
+            ]
           }
         },
         {
@@ -590,12 +590,12 @@ GET /cooking_blog/_search
       "value": 1,
       "relation": "eq"
     },
-    "max_score": 7.9835095,
+    "max_score": 7.444513,
     "hits": [
       {
         "_index": "cooking_blog",
         "_id": "2",
-        "_score": 7.9835095,
+        "_score": 7.444513,
         "_source": {
           "title": "Spicy Thai Green Curry: A Vegetarian Adventure", <1>
           "description": "Dive into the flavors of Thailand with this vibrant green curry. Packed with vegetables and aromatic herbs, this dish is both healthy and satisfying. Don't worry about the heat - you can easily adjust the spice level to your liking.", <2>
@@ -619,8 +619,8 @@ GET /cooking_blog/_search
 <1> The title contains "Spicy" and "Curry", matching our should condition. With the default <<type-best-fields,best_fields>> behavior, this field contributes most to the relevance score.
 <2> While the description also contains matching terms, only the best matching field's score is used by default.
 <3> The recipe was published within the last month, satisfying our recency preference.
-<4> The "Main Course" category matches our `must` condition.
-<5> The "vegetarian" tag satisfies another `must` condition, while "curry" and "spicy" tags align with our `should` preferences.
+<4> The "Main Course" category satisfies another `should` condition.
+<5> The "vegetarian" tag satisfies a `must` condition, while "curry" and "spicy" tags align with our `should` preferences.
 <6> The rating of 4.6 meets our minimum rating requirement of 4.5.
 ==============
 

diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc
@@ -105,7 +105,7 @@ The `rrf` retriever does not currently support:
 * <<rescore, rescore>>
 
 Using unsupported features as part of a search with an `rrf` retriever results in an exception.
-+
+
 IMPORTANT: It is best to avoid providing a <<search-api-pit, point in time>> as part of the request, as
 RRF creates one internally that is shared by all sub-retrievers to ensure consistent results.
 
@@ -703,3 +703,99 @@ So for the same params as above, we would now have:
 
 * `from=0, size=2` would return [`1`, `5`] with ranks `[1, 2]`
 * `from=2, size=2` would return an empty result set as it would fall outside the available `rank_window_size` results.
+
+==== Aggregations in RRF
+
+The `rrf` retriever supports aggregations from all specified sub-retrievers. Important notes about aggregations:
+
+* They operate on the complete result set from all sub-retrievers
+* They are not limited by the `rank_window_size` parameter
+* They process the union of all matching documents
+
+For example, consider the following document set:
+[source,js]
+----
+{
+    "_id": 1, "termA": "foo",
+    "_id": 2, "termA": "foo", "termB": "bar",
+    "_id": 3, "termA": "aardvark", "termB": "bar",
+    "_id": 4, "termA": "foo", "termB": "bar"
+}
+----
+// NOTCONSOLE
+
+Perform a term aggregation on the `termA` field using an `rrf` retriever:
+[source,js]
+----
+{
+    "retriever": {
+        "rrf": {
+            "retrievers": [
+                {
+                    "standard": {
+                        "query": {
+                            "term": {
+                                "termB": "bar"
+                            }
+                        }
+                    }
+                },
+                {
+                    "standard": {
+                        "query": {
+                            "match_all": { }
+                        }
+                    }
+                }
+            ],
+            "rank_window_size": 1
+        }
+    },
+    "size": 1,
+    "aggs": {
+        "termA_agg": {
+            "terms": {
+                "field": "termA"
+            }
+        }
+    }
+}
+----
+// NOTCONSOLE
+
+The aggregation results will include *all* matching documents, regardless of `rank_window_size`.
+[source, js]
+----
+{
+    "foo": 3,
+    "aardvark": 1
+}
+
+----
+// NOTCONSOLE
+
+==== Highlighting in RRF
+
+Using the `rrf` retriever, you can add <<highlighting, highlight snippets>> to show relevant text snippets in your search results. Highlighted snippets are computed based
+on the matching text queries defined on the sub-retrievers.
+
+IMPORTANT: Highlighting on vector fields, using either the `knn` retriever or a `knn` query, is not supported.
+
+A more specific example of highlighting in RRF can also be found in the <<retrievers-examples-highlighting-retriever-results, retrievers examples>> page.
+
+==== Inner hits in RRF
+
+The `rrf` retriever supports <<inner-hits,inner hits>> functionality, allowing you to retrieve 
+related nested or parent/child documents alongside your main search results. Inner hits can be 
+specified as part of any nested sub-retriever and will be propagated to the top-level parent 
+retriever. Note that the inner hit computation will take place only at end of `rrf` retriever's 
+evaluation on the top matching documents, and not as part of the query execution of the nested 
+sub-retrievers.
+
+[IMPORTANT]
+====
+When defining multiple `inner_hits` sections across sub-retrievers:
+
+* Each `inner_hits` section must have a unique name
+* Names must be unique across all sub-retrievers in the search request
+====