Merge branch 'main' into add_esql_hash

idegtiarenko · Dec 9, 2024 · 9d3a44f · 9d3a44f
2 parents 8d76d16 + d411ad8
commit 9d3a44f
Show file tree

Hide file tree

Showing 154 changed files with 7,596 additions and 1,064 deletions.
diff --git a/docs/changelog/117589.yaml b/docs/changelog/117589.yaml
@@ -0,0 +1,5 @@
+pr: 117589
+summary: "Add Inference Unified API for chat completions for OpenAI"
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/changelog/117657.yaml b/docs/changelog/117657.yaml
@@ -0,0 +1,5 @@
+pr: 117657
+summary: Ignore cancellation exceptions
+area: ES|QL
+type: bug
+issues: []
diff --git a/docs/changelog/118064.yaml b/docs/changelog/118064.yaml
@@ -0,0 +1,5 @@
+pr: 118064
+summary: Add Highlighter for Semantic Text Fields
+area: Highlighting
+type: feature
+issues: []
diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc
@@ -750,3 +750,39 @@ Which results in:
   ]
 }
 --------------------------------------------------
+
+[[analysis-kuromoji-completion]]
+==== `kuromoji_completion` token filter
+
+The `kuromoji_completion` token filter adds Japanese romanized tokens to the term attributes along with the original tokens (surface forms).
+
+[source,console]
+--------------------------------------------------
+GET _analyze
+{
+  "analyzer": "kuromoji_completion",
+  "text": "寿司" <1>
+}
+--------------------------------------------------
+
+<1> Returns `寿司`, `susi` (Kunrei-shiki) and `sushi` (Hepburn-shiki).
+
+The `kuromoji_completion` token filter accepts the following settings:
+
+`mode`::
++
+--
+
+The tokenization mode determines how the tokenizer handles compound and
+unknown words. It can be set to:
+
+`index`::
+
+    Simple romanization. Expected to be used when indexing.
+
+`query`::
+
+    Input Method aware romanization. Expected to be used when querying.
+
+Defaults to `index`.
+--
diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc
@@ -112,50 +112,43 @@ Trying to <<delete-inference-api,delete an {infer} endpoint>> that is used on a
 {infer-cap} endpoints have a limit on the amount of text they can process.
 To allow for large amounts of text to be used in semantic search, `semantic_text` automatically generates smaller passages if needed, called _chunks_.
 
-Each chunk will include the text subpassage and the corresponding embedding generated from it.
+Each chunk refers to a passage of the text and the corresponding embedding generated from it.
 When querying, the individual passages will be automatically searched for each document, and the most relevant passage will be used to compute a score.
 
 For more details on chunking and how to configure chunking settings, see <<infer-chunking-config, Configuring chunking>> in the Inference API documentation.
 
+Refer to <<semantic-search-semantic-text,this tutorial>> to learn more about
+semantic search using `semantic_text` and the `semantic` query.
 
 [discrete]
-[[semantic-text-structure]]
-==== `semantic_text` structure
+[[semantic-text-highlighting]]
+==== Extracting Relevant Fragments from Semantic Text
 
-Once a document is ingested, a `semantic_text` field will have the following structure:
+You can extract the most relevant fragments from a semantic text field by using the <<highlighting,highlight parameter>> in the <<search-search-api-request-body,Search API>>.
 
-[source,console-result]
+[source,console]
 ------------------------------------------------------------
-"inference_field": {
-  "text": "these are not the droids you're looking for", <1>
-  "inference": {
-    "inference_id": "my-elser-endpoint", <2>
-    "model_settings": { <3>
-      "task_type": "sparse_embedding"
+PUT test-index
+{
+    "query": {
+        "semantic": {
+            "field": "my_semantic_field"
+        }
     },
-    "chunks": [ <4>
-      {
-        "text": "these are not the droids you're looking for",
-        "embeddings": {
-          (...)
+    "highlight": {
+        "fields": {
+            "my_semantic_field": {
+                "type": "semantic",
+                "number_of_fragments": 2,  <1>
+                "order": "score"           <2>
+            }
         }
-      }
-    ]
-  }
+    }
 }
 ------------------------------------------------------------
-// TEST[skip:TBD]
-<1> The field will become an object structure to accommodate both the original
-text and the inference results.
-<2> The `inference_id` used to generate the embeddings.
-<3> Model settings, including the task type and dimensions/similarity if
-applicable.
-<4> Inference results will be grouped in chunks, each with its corresponding
-text and embeddings.
-
-Refer to <<semantic-search-semantic-text,this tutorial>> to learn more about
-semantic search using `semantic_text` and the `semantic` query.
-
+// TEST[skip:Requires inference endpoint]
+<1> Specifies the maximum number of fragments to return.
+<2> Sorts highlighted fragments by score when set to `score`. By default, fragments will be output in the order they appear in the field (order: none).
 
 [discrete]
 [[custom-indexing]]

diff --git a/...javaRestTest/java/org/elasticsearch/repositories/s3/AbstractRepositoryS3RestTestCase.java b/...javaRestTest/java/org/elasticsearch/repositories/s3/AbstractRepositoryS3RestTestCase.java
@@ -19,6 +19,7 @@
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.rest.ESRestTestCase;
+import org.elasticsearch.test.rest.ObjectPath;
 
 import java.io.Closeable;
 import java.io.IOException;
@@ -27,7 +28,6 @@
 import java.util.function.UnaryOperator;
 import java.util.stream.Collectors;
 
-import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 
@@ -152,10 +152,9 @@ private void testNonexistentBucket(Boolean readonly) throws Exception {
 
         final var responseException = expectThrows(ResponseException.class, () -> client().performRequest(registerRequest));
         assertEquals(RestStatus.INTERNAL_SERVER_ERROR.getStatus(), responseException.getResponse().getStatusLine().getStatusCode());
-        assertThat(
-            responseException.getMessage(),
-            allOf(containsString("repository_verification_exception"), containsString("is not accessible on master node"))
-        );
+        final var responseObjectPath = ObjectPath.createFromResponse(responseException.getResponse());
+        assertThat(responseObjectPath.evaluate("error.type"), equalTo("repository_verification_exception"));
+        assertThat(responseObjectPath.evaluate("error.reason"), containsString("is not accessible on master node"));
     }
 
     public void testNonexistentClient() throws Exception {
@@ -181,15 +180,11 @@ private void testNonexistentClient(Boolean readonly) throws Exception {
 
         final var responseException = expectThrows(ResponseException.class, () -> client().performRequest(registerRequest));
         assertEquals(RestStatus.INTERNAL_SERVER_ERROR.getStatus(), responseException.getResponse().getStatusLine().getStatusCode());
-        assertThat(
-            responseException.getMessage(),
-            allOf(
-                containsString("repository_verification_exception"),
-                containsString("is not accessible on master node"),
-                containsString("illegal_argument_exception"),
-                containsString("Unknown s3 client name")
-            )
-        );
+        final var responseObjectPath = ObjectPath.createFromResponse(responseException.getResponse());
+        assertThat(responseObjectPath.evaluate("error.type"), equalTo("repository_verification_exception"));
+        assertThat(responseObjectPath.evaluate("error.reason"), containsString("is not accessible on master node"));
+        assertThat(responseObjectPath.evaluate("error.caused_by.type"), equalTo("illegal_argument_exception"));
+        assertThat(responseObjectPath.evaluate("error.caused_by.reason"), containsString("Unknown s3 client name"));
     }
 
     public void testNonexistentSnapshot() throws Exception {
@@ -212,21 +207,24 @@ private void testNonexistentSnapshot(Boolean readonly) throws Exception {
             final var getSnapshotRequest = new Request("GET", "/_snapshot/" + repositoryName + "/" + randomIdentifier());
             final var getSnapshotException = expectThrows(ResponseException.class, () -> client().performRequest(getSnapshotRequest));
             assertEquals(RestStatus.NOT_FOUND.getStatus(), getSnapshotException.getResponse().getStatusLine().getStatusCode());
-            assertThat(getSnapshotException.getMessage(), containsString("snapshot_missing_exception"));
+            final var getResponseObjectPath = ObjectPath.createFromResponse(getSnapshotException.getResponse());
+            assertThat(getResponseObjectPath.evaluate("error.type"), equalTo("snapshot_missing_exception"));
 
             final var restoreRequest = new Request("POST", "/_snapshot/" + repositoryName + "/" + randomIdentifier() + "/_restore");
             if (randomBoolean()) {
                 restoreRequest.addParameter("wait_for_completion", Boolean.toString(randomBoolean()));
             }
             final var restoreException = expectThrows(ResponseException.class, () -> client().performRequest(restoreRequest));
             assertEquals(RestStatus.INTERNAL_SERVER_ERROR.getStatus(), restoreException.getResponse().getStatusLine().getStatusCode());
-            assertThat(restoreException.getMessage(), containsString("snapshot_restore_exception"));
+            final var restoreResponseObjectPath = ObjectPath.createFromResponse(restoreException.getResponse());
+            assertThat(restoreResponseObjectPath.evaluate("error.type"), equalTo("snapshot_restore_exception"));
 
             if (readonly != Boolean.TRUE) {
                 final var deleteRequest = new Request("DELETE", "/_snapshot/" + repositoryName + "/" + randomIdentifier());
                 final var deleteException = expectThrows(ResponseException.class, () -> client().performRequest(deleteRequest));
                 assertEquals(RestStatus.NOT_FOUND.getStatus(), deleteException.getResponse().getStatusLine().getStatusCode());
-                assertThat(deleteException.getMessage(), containsString("snapshot_missing_exception"));
+                final var deleteResponseObjectPath = ObjectPath.createFromResponse(deleteException.getResponse());
+                assertThat(deleteResponseObjectPath.evaluate("error.type"), equalTo("snapshot_missing_exception"));
             }
         }
     }

diff --git a/muted-tests.yml b/muted-tests.yml
@@ -117,9 +117,6 @@ tests:
 - class: org.elasticsearch.xpack.deprecation.DeprecationHttpIT
   method: testDeprecatedSettingsReturnWarnings
   issue: https://github.com/elastic/elasticsearch/issues/108628
-- class: org.elasticsearch.action.search.SearchQueryThenFetchAsyncActionTests
-  method: testBottomFieldSort
-  issue: https://github.com/elastic/elasticsearch/issues/116249
 - class: org.elasticsearch.xpack.shutdown.NodeShutdownIT
   method: testAllocationPreventedForRemoval
   issue: https://github.com/elastic/elasticsearch/issues/116363
@@ -242,12 +239,12 @@ tests:
 - class: org.elasticsearch.packaging.test.ConfigurationTests
   method: test30SymlinkedDataPath
   issue: https://github.com/elastic/elasticsearch/issues/118111
-- class: org.elasticsearch.datastreams.ResolveClusterDataStreamIT
-  method: testClusterResolveWithDataStreamsUsingAlias
-  issue: https://github.com/elastic/elasticsearch/issues/118124
 - class: org.elasticsearch.packaging.test.KeystoreManagementTests
   method: test30KeystorePasswordFromFile
   issue: https://github.com/elastic/elasticsearch/issues/118123
+- class: org.elasticsearch.packaging.test.KeystoreManagementTests
+  method: test31WrongKeystorePasswordFromFile
+  issue: https://github.com/elastic/elasticsearch/issues/118123
 - class: org.elasticsearch.packaging.test.ArchiveTests
   method: test41AutoconfigurationNotTriggeredWhenNodeCannotContainData
   issue: https://github.com/elastic/elasticsearch/issues/118110
@@ -260,6 +257,43 @@ tests:
 - class: org.elasticsearch.xpack.remotecluster.CrossClusterEsqlRCS2UnavailableRemotesIT
   method: testEsqlRcs2UnavailableRemoteScenarios
   issue: https://github.com/elastic/elasticsearch/issues/117419
+- class: org.elasticsearch.packaging.test.DebPreservationTests
+  method: test40RestartOnUpgrade
+  issue: https://github.com/elastic/elasticsearch/issues/118170
+- class: org.elasticsearch.xpack.inference.DefaultEndPointsIT
+  method: testInferDeploysDefaultRerank
+  issue: https://github.com/elastic/elasticsearch/issues/118184
+- class: org.elasticsearch.xpack.esql.action.EsqlActionTaskIT
+  method: testCancelRequestWhenFailingFetchingPages
+  issue: https://github.com/elastic/elasticsearch/issues/118193
+- class: org.elasticsearch.packaging.test.MemoryLockingTests
+  method: test20MemoryLockingEnabled
+  issue: https://github.com/elastic/elasticsearch/issues/118195
+- class: org.elasticsearch.packaging.test.ArchiveTests
+  method: test42AutoconfigurationNotTriggeredWhenNodeCannotBecomeMaster
+  issue: https://github.com/elastic/elasticsearch/issues/118196
+- class: org.elasticsearch.packaging.test.ArchiveTests
+  method: test43AutoconfigurationNotTriggeredWhenTlsAlreadyConfigured
+  issue: https://github.com/elastic/elasticsearch/issues/118202
+- class: org.elasticsearch.packaging.test.ArchiveTests
+  method: test44AutoConfigurationNotTriggeredOnNotWriteableConfDir
+  issue: https://github.com/elastic/elasticsearch/issues/118208
+- class: org.elasticsearch.packaging.test.ArchiveTests
+  method: test51AutoConfigurationWithPasswordProtectedKeystore
+  issue: https://github.com/elastic/elasticsearch/issues/118212
+- class: org.elasticsearch.xpack.inference.InferenceCrudIT
+  method: testUnifiedCompletionInference
+  issue: https://github.com/elastic/elasticsearch/issues/118210
+- class: org.elasticsearch.ingest.common.IngestCommonClientYamlTestSuiteIT
+  issue: https://github.com/elastic/elasticsearch/issues/118215
+- class: org.elasticsearch.datastreams.DataStreamsClientYamlTestSuiteIT
+  method: test {p0=data_stream/120_data_streams_stats/Multiple data stream}
+  issue: https://github.com/elastic/elasticsearch/issues/118217
+- class: org.elasticsearch.xpack.security.operator.OperatorPrivilegesIT
+  method: testEveryActionIsEitherOperatorOnlyOrNonOperator
+  issue: https://github.com/elastic/elasticsearch/issues/118220
+- class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT
+  issue: https://github.com/elastic/elasticsearch/issues/118224
 
 # Examples:
 #

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/migrate.reindex.json b/rest-api-spec/src/main/resources/rest-api-spec/api/migrate.reindex.json
@@ -0,0 +1,29 @@
+{
+  "migrate.reindex":{
+    "documentation":{
+      "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/data-stream-reindex.html",
+      "description":"This API reindexes all legacy backing indices for a data stream. It does this in a persistent task. The persistent task id is returned immediately, and the reindexing work is completed in that task"
+    },
+    "stability":"experimental",
+    "visibility":"private",
+    "headers":{
+      "accept": [ "application/json"],
+      "content_type": ["application/json"]
+    },
+    "url":{
+      "paths":[
+        {
+          "path":"/_migration/reindex",
+          "methods":[
+            "POST"
+          ]
+        }
+      ]
+    },
+    "body":{
+      "description":"The body contains the fields `mode` and `source.index, where the only mode currently supported is `upgrade`, and the `source.index` must be a data stream name",
+      "required":true
+    }
+  }
+}
+
diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/ChunkedToXContentHelper.java b/server/src/main/java/org/elasticsearch/common/xcontent/ChunkedToXContentHelper.java
@@ -12,6 +12,7 @@
 import org.elasticsearch.common.collect.Iterators;
 import org.elasticsearch.xcontent.ToXContent;
 
+import java.util.Collections;
 import java.util.Iterator;
 
 public enum ChunkedToXContentHelper {
@@ -53,6 +54,14 @@ public static Iterator<ToXContent> field(String name, String value) {
         return Iterators.single(((builder, params) -> builder.field(name, value)));
     }
 
+    public static Iterator<ToXContent> optionalField(String name, String value) {
+        if (value == null) {
+            return Collections.emptyIterator();
+        } else {
+            return field(name, value);
+        }
+    }
+
     /**
      * Creates an Iterator of a single ToXContent object that serializes the given object as a single chunk. Just wraps {@link
      * Iterators#single}, but still useful because it avoids any type ambiguity.

diff --git a/server/src/main/java/org/elasticsearch/inference/InferenceService.java b/server/src/main/java/org/elasticsearch/inference/InferenceService.java
@@ -112,6 +112,23 @@ void infer(
     );
 
     /**
+     * Perform completion inference on the model using the unified schema.
+     *
+     * @param model        The model
+     * @param request Parameters for the request
+     * @param timeout      The timeout for the request
+     * @param listener     Inference result listener
+     */
+    void unifiedCompletionInfer(
+        Model model,
+        UnifiedCompletionRequest request,
+        TimeValue timeout,
+        ActionListener<InferenceServiceResults> listener
+    );
+
+    /**
+     * Chunk long text.
+     *
      * @param model           The model
      * @param query           Inference query, mainly for re-ranking
      * @param input           Inference input

diff --git a/server/src/main/java/org/elasticsearch/inference/TaskType.java b/server/src/main/java/org/elasticsearch/inference/TaskType.java
@@ -38,6 +38,10 @@ public static TaskType fromString(String name) {
     }
 
     public static TaskType fromStringOrStatusException(String name) {
+        if (name == null) {
+            throw new ElasticsearchStatusException("Task type must not be null", RestStatus.BAD_REQUEST);
+        }
+
         try {
             TaskType taskType = TaskType.fromString(name);
             return Objects.requireNonNull(taskType);