"
+
+# Process each line of the string
+while IFS=: read -r label value; do
+ if [[ -n "$label" && -n "$value" ]]; then
+ # Trim whitespace from label and value
+ trimmed_label=$(echo "$label" | xargs)
+ trimmed_value=$(echo "$value" | xargs)
+
+ # Append to HTML output variable
+ summaryHtml+="
$trimmed_label: $trimmed_value
"
+ fi
+done <<< "$perfOutput"
+
+summaryHtml+="
"
+
+# generate html for links
+summaryHtml+="
Investigation Links
"
+summaryHtml+="
"
+
+# Process each line of the string
+while IFS= read -r line; do
+ if [[ "$line" =~ http.* ]]; then
+ # Extract URL and description using awk
+ url=$(echo "$line" | awk '{print $NF}')
+ description=$(echo "$line" | sed -e "s/:.*//")
+
+ # Append to HTML output variable
+ summaryHtml+="
"
+ fi
+done <<< "$investigationOutput"
+
+# End of the HTML content
+summaryHtml+="
"
+
+cat << EOF | buildkite-agent annotate --context "ctx-validation-summary" --style "info"
+$summaryHtml
+EOF
+
+# Check if the command was successful
+if [ $retval -eq 0 ]; then
+ echo "Experiment completed successfully"
+elif [ $retval -eq 1 ]; then
+ echo "An invalid input was provided while attempting to run the experiment"
+elif [ $retval -eq 2 ]; then
+ echo "One of the builds that is part of the experiment failed"
+elif [ $retval -eq 3 ]; then
+ echo "The build was not fully cacheable for the given task graph"
+elif [ $retval -eq 3 ]; then
+ echo "An unclassified, fatal error happened while running the experiment"
+fi
+
+exit $retval
+
diff --git a/.ci/bwcVersions b/.ci/bwcVersions
index bce556e9fc352..776be80e0d291 100644
--- a/.ci/bwcVersions
+++ b/.ci/bwcVersions
@@ -31,5 +31,6 @@ BWC_VERSION:
- "8.11.4"
- "8.12.2"
- "8.13.4"
- - "8.14.2"
+ - "8.14.4"
- "8.15.0"
+ - "8.16.0"
diff --git a/.ci/snapshotBwcVersions b/.ci/snapshotBwcVersions
index 5fc4b6c072899..f5f7f7a7d4ecb 100644
--- a/.ci/snapshotBwcVersions
+++ b/.ci/snapshotBwcVersions
@@ -1,4 +1,5 @@
BWC_VERSION:
- "7.17.23"
- - "8.14.2"
+ - "8.14.4"
- "8.15.0"
+ - "8.16.0"
diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle
index 8753d4a4762b7..49e81a67e85f9 100644
--- a/benchmarks/build.gradle
+++ b/benchmarks/build.gradle
@@ -47,8 +47,8 @@ dependencies {
api "org.openjdk.jmh:jmh-core:$versions.jmh"
annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:$versions.jmh"
// Dependencies of JMH
- runtimeOnly 'net.sf.jopt-simple:jopt-simple:4.6'
- runtimeOnly 'org.apache.commons:commons-math3:3.2'
+ runtimeOnly 'net.sf.jopt-simple:jopt-simple:5.0.4'
+ runtimeOnly 'org.apache.commons:commons-math3:3.6.1'
}
// enable the JMH's BenchmarkProcessor to generate the final benchmark classes
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java
index 4bb33937579c2..2185c6d1df611 100644
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java
@@ -41,6 +41,7 @@
import org.elasticsearch.compute.lucene.ValuesSourceReaderOperator;
import org.elasticsearch.compute.operator.topn.TopNOperator;
import org.elasticsearch.core.IOUtils;
+import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
@@ -189,6 +190,11 @@ public String indexName() {
return "benchmark";
}
+ @Override
+ public IndexSettings indexSettings() {
+ throw new UnsupportedOperationException();
+ }
+
@Override
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
return MappedFieldType.FieldExtractPreference.NONE;
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java
index 230e0c7e546c2..691874c775302 100644
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/TermsReduceBenchmark.java
@@ -71,7 +71,7 @@ public class TermsReduceBenchmark {
private final SearchPhaseController controller = new SearchPhaseController((task, req) -> new AggregationReduceContext.Builder() {
@Override
public AggregationReduceContext forPartialReduction() {
- return new AggregationReduceContext.ForPartial(null, null, task, builder);
+ return new AggregationReduceContext.ForPartial(null, null, task, builder, b -> {});
}
@Override
diff --git a/branches.json b/branches.json
index 2794b545facc6..b852cd1fa5dbd 100644
--- a/branches.json
+++ b/branches.json
@@ -4,6 +4,9 @@
{
"branch": "main"
},
+ {
+ "branch": "8.15"
+ },
{
"branch": "8.14"
},
diff --git a/build-tools-internal/build.gradle b/build-tools-internal/build.gradle
index 84e56bbaf03ad..a8d1110ff4736 100644
--- a/build-tools-internal/build.gradle
+++ b/build-tools-internal/build.gradle
@@ -274,10 +274,7 @@ dependencies {
// ensuring brought asm version brought in by spock is up-to-date
testImplementation buildLibs.asm
integTestImplementation buildLibs.asm
- integTestImplementation('org.ow2.asm:asm:9.6')
- api("org.yaml:snakeyaml") {
- version { strictly(versions.snakeyaml) }
- }
+ api(buildLibs.snakeyaml)
}
// Forcefully downgrade the jackson platform as used in production
api enforcedPlatform(buildLibs.jackson.platform)
@@ -314,7 +311,7 @@ dependencies {
compileOnly buildLibs.checkstyle
compileOnly buildLibs.reflections
- implementation 'com.github.javaparser:javaparser-core:3.18.0'
+ implementation buildLibs.javaparser
runtimeOnly "org.elasticsearch.gradle:reaper:$version"
testImplementation buildLibs.checkstyle
diff --git a/build-tools-internal/src/main/groovy/elasticsearch.build-scan.gradle b/build-tools-internal/src/main/groovy/elasticsearch.build-scan.gradle
index c6930c2263ec3..7cba4730e88da 100644
--- a/build-tools-internal/src/main/groovy/elasticsearch.build-scan.gradle
+++ b/build-tools-internal/src/main/groovy/elasticsearch.build-scan.gradle
@@ -26,13 +26,10 @@ develocity {
if (jenkinsUrl?.host?.endsWith('elastic.co') || jenkinsUrl?.host?.endsWith('elastic.dev') || System.getenv('BUILDKITE') == 'true') {
publishing.onlyIf { true }
server = 'https://gradle-enterprise.elastic.co'
- } else {
- publishing.onlyIf {
- server.isPresent();
- }
+ } else if( server.isPresent() == false) {
+ publishing.onlyIf { false }
}
-
background {
tag OS.current().name()
tag Architecture.current().name()
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java
index 4f9498c8f33a6..b513fd7b93631 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java
@@ -8,7 +8,7 @@
package org.elasticsearch.gradle.internal;
-import com.gradle.scan.plugin.BuildScanExtension;
+import com.gradle.develocity.agent.gradle.DevelocityConfiguration;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
@@ -64,7 +64,7 @@ public void apply(Project target) {
File targetFile = target.file("build/" + buildNumber + ".tar.bz2");
File projectDir = target.getProjectDir();
File gradleWorkersDir = new File(target.getGradle().getGradleUserHomeDir(), "workers/");
- BuildScanExtension extension = target.getExtensions().getByType(BuildScanExtension.class);
+ DevelocityConfiguration extension = target.getExtensions().getByType(DevelocityConfiguration.class);
File daemonsLogDir = new File(target.getGradle().getGradleUserHomeDir(), "daemon/" + target.getGradle().getGradleVersion());
getFlowScope().always(BuildFinishedFlowAction.class, spec -> {
@@ -125,7 +125,7 @@ interface Parameters extends FlowParameters {
ListProperty getFilteredFiles();
@Input
- Property getBuildScan();
+ Property getBuildScan();
}
@@ -198,7 +198,7 @@ public void execute(BuildFinishedFlowAction.Parameters parameters) throws FileNo
+ System.getenv("BUILDKITE_JOB_ID")
+ "/artifacts/"
+ artifactUuid;
- parameters.getBuildScan().get().link("Artifact Upload", targetLink);
+ parameters.getBuildScan().get().getBuildScan().link("Artifact Upload", targetLink);
}
} catch (Exception e) {
System.out.println("Failed to upload buildkite artifact " + e.getMessage());
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java
index d344b4694a5b5..689c8ddecb057 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java
@@ -25,6 +25,7 @@
import org.gradle.api.artifacts.Configuration;
import org.gradle.api.file.FileCollection;
import org.gradle.api.plugins.JavaPlugin;
+import org.gradle.api.provider.ProviderFactory;
import org.gradle.api.tasks.SourceSet;
import org.gradle.api.tasks.SourceSetContainer;
import org.gradle.api.tasks.testing.Test;
@@ -33,16 +34,21 @@
import java.util.List;
import java.util.Map;
+import javax.inject.Inject;
+
import static org.elasticsearch.gradle.util.FileUtils.mkdirs;
import static org.elasticsearch.gradle.util.GradleUtils.maybeConfigure;
/**
* Applies commonly used settings to all Test tasks in the project
*/
-public class ElasticsearchTestBasePlugin implements Plugin {
+public abstract class ElasticsearchTestBasePlugin implements Plugin {
public static final String DUMP_OUTPUT_ON_FAILURE_PROP_NAME = "dumpOutputOnFailure";
+ @Inject
+ protected abstract ProviderFactory getProviderFactory();
+
@Override
public void apply(Project project) {
project.getPluginManager().apply(GradleTestPolicySetupPlugin.class);
@@ -150,13 +156,11 @@ public void execute(Task t) {
// we use 'temp' relative to CWD since this is per JVM and tests are forbidden from writing to CWD
nonInputProperties.systemProperty("java.io.tmpdir", test.getWorkingDir().toPath().resolve("temp"));
+ test.systemProperties(getProviderFactory().systemPropertiesPrefixedBy("tests.").get());
+ test.systemProperties(getProviderFactory().systemPropertiesPrefixedBy("es.").get());
+
// TODO: remove setting logging level via system property
test.systemProperty("tests.logger.level", "WARN");
- System.getProperties().entrySet().forEach(entry -> {
- if ((entry.getKey().toString().startsWith("tests.") || entry.getKey().toString().startsWith("es."))) {
- test.systemProperty(entry.getKey().toString(), entry.getValue());
- }
- });
// TODO: remove this once ctx isn't added to update script params in 7.0
test.systemProperty("es.scripting.update.ctx_in_params", "false");
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/InternalDistributionModuleCheckTaskProvider.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/InternalDistributionModuleCheckTaskProvider.java
index 13f265388fe3f..a4412cd3db247 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/InternalDistributionModuleCheckTaskProvider.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/InternalDistributionModuleCheckTaskProvider.java
@@ -59,7 +59,6 @@ public class InternalDistributionModuleCheckTaskProvider {
"org.elasticsearch.plugin",
"org.elasticsearch.plugin.analysis",
"org.elasticsearch.pluginclassloader",
- "org.elasticsearch.preallocate",
"org.elasticsearch.securesm",
"org.elasticsearch.server",
"org.elasticsearch.simdvec",
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java
index 42834928bafed..b8ebb454ddb16 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java
@@ -51,6 +51,7 @@
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
+import java.util.Optional;
import java.util.Random;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
@@ -97,24 +98,25 @@ public void apply(Project project) {
JavaVersion minimumCompilerVersion = JavaVersion.toVersion(getResourceContents("/minimumCompilerVersion"));
JavaVersion minimumRuntimeVersion = JavaVersion.toVersion(getResourceContents("/minimumRuntimeVersion"));
- File runtimeJavaHome = findRuntimeJavaHome();
- boolean isRuntimeJavaHomeSet = Jvm.current().getJavaHome().equals(runtimeJavaHome) == false;
+ Optional selectedRuntimeJavaHome = findRuntimeJavaHome();
+ File actualRuntimeJavaHome = selectedRuntimeJavaHome.orElse(Jvm.current().getJavaHome());
+ boolean isRuntimeJavaHomeSet = selectedRuntimeJavaHome.isPresent();
GitInfo gitInfo = GitInfo.gitInfo(project.getRootDir());
BuildParams.init(params -> {
params.reset();
- params.setRuntimeJavaHome(runtimeJavaHome);
+ params.setRuntimeJavaHome(actualRuntimeJavaHome);
params.setJavaToolChainSpec(resolveToolchainSpecFromEnv());
params.setRuntimeJavaVersion(
determineJavaVersion(
"runtime java.home",
- runtimeJavaHome,
+ actualRuntimeJavaHome,
isRuntimeJavaHomeSet ? minimumRuntimeVersion : Jvm.current().getJavaVersion()
)
);
params.setIsRuntimeJavaHomeSet(isRuntimeJavaHomeSet);
- JvmInstallationMetadata runtimeJdkMetaData = metadataDetector.getMetadata(getJavaInstallation(runtimeJavaHome));
+ JvmInstallationMetadata runtimeJdkMetaData = metadataDetector.getMetadata(getJavaInstallation(actualRuntimeJavaHome));
params.setRuntimeJavaDetails(formatJavaVendorDetails(runtimeJdkMetaData));
params.setJavaVersions(getAvailableJavaVersions());
params.setMinimumCompilerVersion(minimumCompilerVersion);
@@ -298,19 +300,19 @@ private static void assertMinimumCompilerVersion(JavaVersion minimumCompilerVers
}
}
- private File findRuntimeJavaHome() {
+ private Optional findRuntimeJavaHome() {
String runtimeJavaProperty = System.getProperty("runtime.java");
if (runtimeJavaProperty != null) {
- return resolveJavaHomeFromToolChainService(runtimeJavaProperty);
+ return Optional.of(resolveJavaHomeFromToolChainService(runtimeJavaProperty));
}
String env = System.getenv("RUNTIME_JAVA_HOME");
if (env != null) {
- return new File(env);
+ return Optional.of(new File(env));
}
// fall back to tool chain if set.
env = System.getenv("JAVA_TOOLCHAIN_HOME");
- return env == null ? Jvm.current().getJavaHome() : new File(env);
+ return env == null ? Optional.empty() : Optional.of(new File(env));
}
@NotNull
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/DependencyLicensesTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/DependencyLicensesTask.java
index 0099a4616f829..07817fdaed1fe 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/DependencyLicensesTask.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/DependencyLicensesTask.java
@@ -23,11 +23,14 @@
import org.gradle.api.provider.Property;
import org.gradle.api.provider.Provider;
import org.gradle.api.specs.Spec;
+import org.gradle.api.tasks.CacheableTask;
import org.gradle.api.tasks.Input;
import org.gradle.api.tasks.InputDirectory;
import org.gradle.api.tasks.InputFiles;
import org.gradle.api.tasks.Optional;
import org.gradle.api.tasks.OutputDirectory;
+import org.gradle.api.tasks.PathSensitive;
+import org.gradle.api.tasks.PathSensitivity;
import org.gradle.api.tasks.TaskAction;
import java.io.File;
@@ -89,6 +92,7 @@
* for the dependency. This artifact will be redistributed by us with the release to
* comply with the license terms.
*/
+@CacheableTask
public abstract class DependencyLicensesTask extends DefaultTask {
private final Pattern regex = Pattern.compile("-v?\\d+.*");
@@ -149,6 +153,7 @@ public DependencyLicensesTask(ObjectFactory objects, ProjectLayout projectLayout
}
@InputFiles
+ @PathSensitive(PathSensitivity.NAME_ONLY)
public FileCollection getDependencies() {
return dependencies;
}
@@ -159,6 +164,7 @@ public void setDependencies(FileCollection dependencies) {
@Optional
@InputDirectory
+ @PathSensitive(PathSensitivity.RELATIVE)
public File getLicensesDir() {
File asFile = licensesDir.get().getAsFile();
if (asFile.exists()) {
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/SplitPackagesAuditTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/SplitPackagesAuditTask.java
index ec279589a6bed..f75adbe640297 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/SplitPackagesAuditTask.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/SplitPackagesAuditTask.java
@@ -20,6 +20,7 @@
import org.gradle.api.provider.MapProperty;
import org.gradle.api.provider.Property;
import org.gradle.api.provider.SetProperty;
+import org.gradle.api.tasks.CacheableTask;
import org.gradle.api.tasks.CompileClasspath;
import org.gradle.api.tasks.Input;
import org.gradle.api.tasks.InputFiles;
@@ -56,6 +57,7 @@
/**
* Checks for split packages with dependencies. These are not allowed in a future modularized world.
*/
+@CacheableTask
public class SplitPackagesAuditTask extends DefaultTask {
private static final Logger LOGGER = Logging.getLogger(SplitPackagesAuditTask.class);
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/AbstractVersionsTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/AbstractVersionsTask.java
index 0ab3a9b917d65..ad39faad1bc85 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/AbstractVersionsTask.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/AbstractVersionsTask.java
@@ -8,19 +8,119 @@
package org.elasticsearch.gradle.internal.release;
+import com.github.javaparser.GeneratedJavaParserConstants;
+import com.github.javaparser.ast.CompilationUnit;
+import com.github.javaparser.ast.body.ClassOrInterfaceDeclaration;
+import com.github.javaparser.ast.body.FieldDeclaration;
+import com.github.javaparser.ast.expr.IntegerLiteralExpr;
+import com.github.javaparser.ast.observer.ObservableProperty;
+import com.github.javaparser.printer.ConcreteSyntaxModel;
+import com.github.javaparser.printer.concretesyntaxmodel.CsmElement;
+import com.github.javaparser.printer.lexicalpreservation.LexicalPreservingPrinter;
+
import org.gradle.api.DefaultTask;
+import org.gradle.api.logging.Logger;
+import org.gradle.api.logging.Logging;
import org.gradle.initialization.layout.BuildLayout;
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.nio.file.Files;
import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.List;
+import java.util.Map;
+import java.util.OptionalInt;
+import java.util.stream.Collectors;
+
+import static com.github.javaparser.ast.observer.ObservableProperty.TYPE_PARAMETERS;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmConditional.Condition.FLAG;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.block;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.child;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.comma;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.comment;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.conditional;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.list;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.newline;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.none;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.sequence;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.space;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.string;
+import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.token;
public abstract class AbstractVersionsTask extends DefaultTask {
+ static {
+ replaceDefaultJavaParserClassCsm();
+ }
+
+ /*
+ * The default JavaParser CSM which it uses to format any new declarations added to a class
+ * inserts two newlines after each declaration. Our version classes only have one newline.
+ * In order to get javaparser lexical printer to use our format, we have to completely replace
+ * the statically declared CSM pattern using hacky reflection
+ * to access the static map where these are stored, and insert a replacement that is identical
+ * apart from only one newline at the end of each member declaration, rather than two.
+ */
+ private static void replaceDefaultJavaParserClassCsm() {
+ try {
+ Field classCsms = ConcreteSyntaxModel.class.getDeclaredField("concreteSyntaxModelByClass");
+ classCsms.setAccessible(true);
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ Map csms = (Map) classCsms.get(null);
+
+ // copied from the static initializer in ConcreteSyntaxModel
+ csms.put(
+ ClassOrInterfaceDeclaration.class,
+ sequence(
+ comment(),
+ list(ObservableProperty.ANNOTATIONS, newline(), none(), newline()),
+ list(ObservableProperty.MODIFIERS, space(), none(), space()),
+ conditional(
+ ObservableProperty.INTERFACE,
+ FLAG,
+ token(GeneratedJavaParserConstants.INTERFACE),
+ token(GeneratedJavaParserConstants.CLASS)
+ ),
+ space(),
+ child(ObservableProperty.NAME),
+ list(
+ TYPE_PARAMETERS,
+ sequence(comma(), space()),
+ string(GeneratedJavaParserConstants.LT),
+ string(GeneratedJavaParserConstants.GT)
+ ),
+ list(
+ ObservableProperty.EXTENDED_TYPES,
+ sequence(string(GeneratedJavaParserConstants.COMMA), space()),
+ sequence(space(), token(GeneratedJavaParserConstants.EXTENDS), space()),
+ none()
+ ),
+ list(
+ ObservableProperty.IMPLEMENTED_TYPES,
+ sequence(string(GeneratedJavaParserConstants.COMMA), space()),
+ sequence(space(), token(GeneratedJavaParserConstants.IMPLEMENTS), space()),
+ none()
+ ),
+ space(),
+ block(sequence(newline(), list(ObservableProperty.MEMBERS, sequence(newline()/*, newline()*/), newline(), newline())))
+ )
+ );
+ } catch (ReflectiveOperationException e) {
+ throw new AssertionError(e);
+ }
+ }
+
+ private static final Logger LOGGER = Logging.getLogger(AbstractVersionsTask.class);
+
static final String TRANSPORT_VERSION_TYPE = "TransportVersion";
static final String INDEX_VERSION_TYPE = "IndexVersion";
static final String SERVER_MODULE_PATH = "server/src/main/java/";
- static final String TRANSPORT_VERSION_FILE_PATH = SERVER_MODULE_PATH + "org/elasticsearch/TransportVersions.java";
- static final String INDEX_VERSION_FILE_PATH = SERVER_MODULE_PATH + "org/elasticsearch/index/IndexVersions.java";
+
+ static final String VERSION_FILE_PATH = SERVER_MODULE_PATH + "org/elasticsearch/Version.java";
+ static final String TRANSPORT_VERSIONS_FILE_PATH = SERVER_MODULE_PATH + "org/elasticsearch/TransportVersions.java";
+ static final String INDEX_VERSIONS_FILE_PATH = SERVER_MODULE_PATH + "org/elasticsearch/index/IndexVersions.java";
static final String SERVER_RESOURCES_PATH = "server/src/main/resources/";
static final String TRANSPORT_VERSIONS_RECORD = SERVER_RESOURCES_PATH + "org/elasticsearch/TransportVersions.csv";
@@ -32,4 +132,34 @@ protected AbstractVersionsTask(BuildLayout layout) {
rootDir = layout.getRootDirectory().toPath();
}
+ static Map splitVersionIds(List version) {
+ return version.stream().map(l -> {
+ var split = l.split(":");
+ if (split.length != 2) throw new IllegalArgumentException("Invalid tag format [" + l + "]");
+ return split;
+ }).collect(Collectors.toMap(l -> l[0], l -> Integer.parseInt(l[1])));
+ }
+
+ static OptionalInt findSingleIntegerExpr(FieldDeclaration field) {
+ var ints = field.findAll(IntegerLiteralExpr.class);
+ switch (ints.size()) {
+ case 0 -> {
+ return OptionalInt.empty();
+ }
+ case 1 -> {
+ return OptionalInt.of(ints.get(0).asNumber().intValue());
+ }
+ default -> {
+ LOGGER.warn("Multiple integers found in version field declaration [{}]", field); // and ignore it
+ return OptionalInt.empty();
+ }
+ }
+ }
+
+ static void writeOutNewContents(Path file, CompilationUnit unit) throws IOException {
+ if (unit.containsData(LexicalPreservingPrinter.NODE_TEXT_DATA) == false) {
+ throw new IllegalArgumentException("CompilationUnit has no lexical information for output");
+ }
+ Files.writeString(file, LexicalPreservingPrinter.print(unit), StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING);
+ }
}
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ExtractCurrentVersionsTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ExtractCurrentVersionsTask.java
index 3530d7ef9e807..53dd55041f6bd 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ExtractCurrentVersionsTask.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ExtractCurrentVersionsTask.java
@@ -11,7 +11,6 @@
import com.github.javaparser.StaticJavaParser;
import com.github.javaparser.ast.CompilationUnit;
import com.github.javaparser.ast.body.FieldDeclaration;
-import com.github.javaparser.ast.expr.IntegerLiteralExpr;
import org.gradle.api.logging.Logger;
import org.gradle.api.logging.Logging;
@@ -53,11 +52,11 @@ public void executeTask() throws IOException {
LOGGER.lifecycle("Extracting latest version information");
List output = new ArrayList<>();
- int transportVersion = readLatestVersion(rootDir.resolve(TRANSPORT_VERSION_FILE_PATH));
+ int transportVersion = readLatestVersion(rootDir.resolve(TRANSPORT_VERSIONS_FILE_PATH));
LOGGER.lifecycle("Transport version: {}", transportVersion);
output.add(TRANSPORT_VERSION_TYPE + ":" + transportVersion);
- int indexVersion = readLatestVersion(rootDir.resolve(INDEX_VERSION_FILE_PATH));
+ int indexVersion = readLatestVersion(rootDir.resolve(INDEX_VERSIONS_FILE_PATH));
LOGGER.lifecycle("Index version: {}", indexVersion);
output.add(INDEX_VERSION_TYPE + ":" + indexVersion);
@@ -74,21 +73,13 @@ Integer highestVersionId() {
@Override
public void accept(FieldDeclaration fieldDeclaration) {
- var ints = fieldDeclaration.findAll(IntegerLiteralExpr.class);
- switch (ints.size()) {
- case 0 -> {
- // No ints in the field declaration, ignore
+ findSingleIntegerExpr(fieldDeclaration).ifPresent(id -> {
+ if (highestVersionId != null && highestVersionId > id) {
+ LOGGER.warn("Version ids [{}, {}] out of order", highestVersionId, id);
+ } else {
+ highestVersionId = id;
}
- case 1 -> {
- int id = ints.get(0).asNumber().intValue();
- if (highestVersionId != null && highestVersionId > id) {
- LOGGER.warn("Version ids [{}, {}] out of order", highestVersionId, id);
- } else {
- highestVersionId = id;
- }
- }
- default -> LOGGER.warn("Multiple integers found in version field declaration [{}]", fieldDeclaration); // and ignore it
- }
+ });
}
}
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java
index 8001b82797557..08abb02ea831e 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java
@@ -52,6 +52,7 @@ public void apply(Project project) {
project.getTasks().register("extractCurrentVersions", ExtractCurrentVersionsTask.class);
project.getTasks().register("tagVersions", TagVersionsTask.class);
+ project.getTasks().register("setCompatibleVersions", SetCompatibleVersionsTask.class);
final FileTree yamlFiles = projectDirectory.dir("docs/changelog")
.getAsFileTree()
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java
new file mode 100644
index 0000000000000..15e0a0cc345d5
--- /dev/null
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.gradle.internal.release;
+
+import com.github.javaparser.StaticJavaParser;
+import com.github.javaparser.ast.CompilationUnit;
+import com.github.javaparser.ast.body.ClassOrInterfaceDeclaration;
+import com.github.javaparser.ast.expr.NameExpr;
+import com.github.javaparser.printer.lexicalpreservation.LexicalPreservingPrinter;
+
+import org.gradle.api.tasks.TaskAction;
+import org.gradle.api.tasks.options.Option;
+import org.gradle.initialization.layout.BuildLayout;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import javax.inject.Inject;
+
+public class SetCompatibleVersionsTask extends AbstractVersionsTask {
+
+ private Map versionIds = Map.of();
+
+ @Inject
+ public SetCompatibleVersionsTask(BuildLayout layout) {
+ super(layout);
+ }
+
+ @Option(option = "version-id", description = "Version id used for the release. Of the form :.")
+ public void versionIds(List version) {
+ this.versionIds = splitVersionIds(version);
+ }
+
+ @TaskAction
+ public void executeTask() throws IOException {
+ if (versionIds.isEmpty()) {
+ throw new IllegalArgumentException("No version ids specified");
+ }
+ Integer transportVersion = versionIds.get(TRANSPORT_VERSION_TYPE);
+ if (transportVersion == null) {
+ throw new IllegalArgumentException("TransportVersion id not specified");
+ }
+
+ Path versionJava = rootDir.resolve(TRANSPORT_VERSIONS_FILE_PATH);
+ CompilationUnit file = LexicalPreservingPrinter.setup(StaticJavaParser.parse(versionJava));
+
+ Optional modifiedFile;
+
+ modifiedFile = setMinimumCcsTransportVersion(file, transportVersion);
+
+ if (modifiedFile.isPresent()) {
+ writeOutNewContents(versionJava, modifiedFile.get());
+ }
+ }
+
+ static Optional setMinimumCcsTransportVersion(CompilationUnit unit, int transportVersion) {
+ ClassOrInterfaceDeclaration transportVersions = unit.getClassByName("TransportVersions").get();
+
+ String tvConstantName = transportVersions.getFields().stream().filter(f -> {
+ var i = findSingleIntegerExpr(f);
+ return i.isPresent() && i.getAsInt() == transportVersion;
+ })
+ .map(f -> f.getVariable(0).getNameAsString())
+ .findFirst()
+ .orElseThrow(() -> new IllegalStateException("Could not find constant for id " + transportVersion));
+
+ transportVersions.getFieldByName("MINIMUM_CCS_VERSION")
+ .orElseThrow(() -> new IllegalStateException("Could not find MINIMUM_CCS_VERSION constant"))
+ .getVariable(0)
+ .setInitializer(new NameExpr(tvConstantName));
+
+ return Optional.of(unit);
+ }
+}
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/TagVersionsTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/TagVersionsTask.java
index fa11746543e82..a7f67f87b602e 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/TagVersionsTask.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/TagVersionsTask.java
@@ -47,11 +47,7 @@ public void release(String version) {
@Option(option = "tag-version", description = "Version id to tag. Of the form :.")
public void tagVersions(List version) {
- this.tagVersions = version.stream().map(l -> {
- var split = l.split(":");
- if (split.length != 2) throw new IllegalArgumentException("Invalid tag format [" + l + "]");
- return split;
- }).collect(Collectors.toMap(l -> l[0], l -> Integer.parseInt(l[1])));
+ this.tagVersions = splitVersionIds(version);
}
@TaskAction
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/UpdateVersionsTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/UpdateVersionsTask.java
index 9996ffe613545..b19e5c0beacf8 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/UpdateVersionsTask.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/UpdateVersionsTask.java
@@ -8,7 +8,6 @@
package org.elasticsearch.gradle.internal.release;
-import com.github.javaparser.GeneratedJavaParserConstants;
import com.github.javaparser.StaticJavaParser;
import com.github.javaparser.ast.CompilationUnit;
import com.github.javaparser.ast.NodeList;
@@ -16,14 +15,10 @@
import com.github.javaparser.ast.body.FieldDeclaration;
import com.github.javaparser.ast.body.VariableDeclarator;
import com.github.javaparser.ast.expr.NameExpr;
-import com.github.javaparser.ast.observer.ObservableProperty;
-import com.github.javaparser.printer.ConcreteSyntaxModel;
-import com.github.javaparser.printer.concretesyntaxmodel.CsmElement;
import com.github.javaparser.printer.lexicalpreservation.LexicalPreservingPrinter;
import com.google.common.annotations.VisibleForTesting;
import org.elasticsearch.gradle.Version;
-import org.gradle.api.DefaultTask;
import org.gradle.api.logging.Logger;
import org.gradle.api.logging.Logging;
import org.gradle.api.tasks.TaskAction;
@@ -31,10 +26,7 @@
import org.gradle.initialization.layout.BuildLayout;
import java.io.IOException;
-import java.lang.reflect.Field;
-import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Objects;
@@ -47,93 +39,12 @@
import javax.annotation.Nullable;
import javax.inject.Inject;
-import static com.github.javaparser.ast.observer.ObservableProperty.TYPE_PARAMETERS;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmConditional.Condition.FLAG;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.block;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.child;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.comma;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.comment;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.conditional;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.list;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.newline;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.none;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.sequence;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.space;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.string;
-import static com.github.javaparser.printer.concretesyntaxmodel.CsmElement.token;
-
-public class UpdateVersionsTask extends DefaultTask {
-
- static {
- replaceDefaultJavaParserClassCsm();
- }
-
- /*
- * The default JavaParser CSM which it uses to format any new declarations added to a class
- * inserts two newlines after each declaration. Our version classes only have one newline.
- * In order to get javaparser lexical printer to use our format, we have to completely replace
- * the statically declared CSM pattern using hacky reflection
- * to access the static map where these are stored, and insert a replacement that is identical
- * apart from only one newline at the end of each member declaration, rather than two.
- */
- private static void replaceDefaultJavaParserClassCsm() {
- try {
- Field classCsms = ConcreteSyntaxModel.class.getDeclaredField("concreteSyntaxModelByClass");
- classCsms.setAccessible(true);
- @SuppressWarnings({ "unchecked", "rawtypes" })
- Map csms = (Map) classCsms.get(null);
-
- // copied from the static initializer in ConcreteSyntaxModel
- csms.put(
- ClassOrInterfaceDeclaration.class,
- sequence(
- comment(),
- list(ObservableProperty.ANNOTATIONS, newline(), none(), newline()),
- list(ObservableProperty.MODIFIERS, space(), none(), space()),
- conditional(
- ObservableProperty.INTERFACE,
- FLAG,
- token(GeneratedJavaParserConstants.INTERFACE),
- token(GeneratedJavaParserConstants.CLASS)
- ),
- space(),
- child(ObservableProperty.NAME),
- list(
- TYPE_PARAMETERS,
- sequence(comma(), space()),
- string(GeneratedJavaParserConstants.LT),
- string(GeneratedJavaParserConstants.GT)
- ),
- list(
- ObservableProperty.EXTENDED_TYPES,
- sequence(string(GeneratedJavaParserConstants.COMMA), space()),
- sequence(space(), token(GeneratedJavaParserConstants.EXTENDS), space()),
- none()
- ),
- list(
- ObservableProperty.IMPLEMENTED_TYPES,
- sequence(string(GeneratedJavaParserConstants.COMMA), space()),
- sequence(space(), token(GeneratedJavaParserConstants.IMPLEMENTS), space()),
- none()
- ),
- space(),
- block(sequence(newline(), list(ObservableProperty.MEMBERS, sequence(newline()/*, newline()*/), newline(), newline())))
- )
- );
- } catch (ReflectiveOperationException e) {
- throw new AssertionError(e);
- }
- }
+public class UpdateVersionsTask extends AbstractVersionsTask {
private static final Logger LOGGER = Logging.getLogger(UpdateVersionsTask.class);
- static final String SERVER_MODULE_PATH = "server/src/main/java/";
- static final String VERSION_FILE_PATH = SERVER_MODULE_PATH + "org/elasticsearch/Version.java";
-
static final Pattern VERSION_FIELD = Pattern.compile("V_(\\d+)_(\\d+)_(\\d+)(?:_(\\w+))?");
- final Path rootDir;
-
@Nullable
private Version addVersion;
private boolean setCurrent;
@@ -142,7 +53,7 @@ private static void replaceDefaultJavaParserClassCsm() {
@Inject
public UpdateVersionsTask(BuildLayout layout) {
- rootDir = layout.getRootDirectory().toPath();
+ super(layout);
}
@Option(option = "add-version", description = "Specifies the version to add")
@@ -287,11 +198,4 @@ static Optional removeVersionConstant(CompilationUnit versionJa
return Optional.of(versionJava);
}
-
- static void writeOutNewContents(Path file, CompilationUnit unit) throws IOException {
- if (unit.containsData(LexicalPreservingPrinter.NODE_TEXT_DATA) == false) {
- throw new IllegalArgumentException("CompilationUnit has no lexical information for output");
- }
- Files.writeString(file, LexicalPreservingPrinter.print(unit), StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING);
- }
}
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/OracleOpenJdkToolchainResolver.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/OracleOpenJdkToolchainResolver.java
index d0c7e9316d996..ec86798e653f1 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/OracleOpenJdkToolchainResolver.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/OracleOpenJdkToolchainResolver.java
@@ -88,7 +88,7 @@ public String url(String os, String arch, String extension) {
List builds = List.of(
getBundledJdkBuild(),
// 23 early access
- new EarlyAccessJdkBuild(JavaLanguageVersion.of(23), "23", "23")
+ new EarlyAccessJdkBuild(JavaLanguageVersion.of(23), "23", "24")
);
private JdkBuild getBundledJdkBuild() {
diff --git a/build-tools-internal/src/test/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTaskTests.java b/build-tools-internal/src/test/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTaskTests.java
new file mode 100644
index 0000000000000..eecb953a44eb6
--- /dev/null
+++ b/build-tools-internal/src/test/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTaskTests.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.gradle.internal.release;
+
+import com.github.javaparser.StaticJavaParser;
+import com.github.javaparser.ast.CompilationUnit;
+
+import org.junit.Test;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.hasToString;
+
+public class SetCompatibleVersionsTaskTests {
+
+ @Test
+ public void updateMinCcsVersion() {
+ final String transportVersionsJava = """
+ public class TransportVersions {
+ public static final TransportVersion V1 = def(100);
+ public static final TransportVersion V2 = def(200);
+ public static final TransportVersion V3 = def(300);
+
+ public static final TransportVersion MINIMUM_CCS_VERSION = V2;
+ }""";
+ final String updatedJava = """
+ public class TransportVersions {
+
+ public static final TransportVersion V1 = def(100);
+
+ public static final TransportVersion V2 = def(200);
+
+ public static final TransportVersion V3 = def(300);
+
+ public static final TransportVersion MINIMUM_CCS_VERSION = V3;
+ }
+ """;
+
+ CompilationUnit unit = StaticJavaParser.parse(transportVersionsJava);
+
+ SetCompatibleVersionsTask.setMinimumCcsTransportVersion(unit, 300);
+
+ assertThat(unit, hasToString(updatedJava));
+ }
+}
diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties
index 12417239cc7dc..1dd9fb95bd17b 100644
--- a/build-tools-internal/version.properties
+++ b/build-tools-internal/version.properties
@@ -1,5 +1,5 @@
-elasticsearch = 8.15.0
-lucene = 9.11.0
+elasticsearch = 8.16.0
+lucene = 9.11.1
bundled_jdk_vendor = openjdk
bundled_jdk = 22.0.1+8@c7ec1332f7bb44aeba2eb341ae18aca4
@@ -49,7 +49,7 @@ commonsCompress = 1.24.0
reflections = 0.10.2
# benchmark dependencies
-jmh = 1.26
+jmh = 1.37
# test dependencies
# when updating this version, also update :qa:evil-tests
diff --git a/distribution/build.gradle b/distribution/build.gradle
index 77f1a2d032c73..47367ab0261a2 100644
--- a/distribution/build.gradle
+++ b/distribution/build.gradle
@@ -280,8 +280,6 @@ configure(subprojects.findAll { ['archives', 'packages'].contains(it.name) }) {
dependencies {
libs project(':server')
- // this is a special circumstance of a jar that is not a dependency of server, but needs to be in the module path
- libs project(':libs:elasticsearch-preallocate')
libsVersionChecker project(':distribution:tools:java-version-checker')
libsCliLauncher project(':distribution:tools:cli-launcher')
diff --git a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java
index 298b4671582b5..2a89f18209d11 100644
--- a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java
+++ b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java
@@ -69,11 +69,6 @@ static List systemJvmOptions(Settings nodeSettings, final Map
+ {
+ "description": "Grants user access to some indicies.",
+ "indices": [
+ {"names": ["index1", "index2" ], "privileges": ["all"], "field_security" : {"grant" : [ "title", "body" ]}}
+ ],
+ "metadata" : {"version": 1}
+ }
+
'''
setups['jacknich_user'] = '''
- do:
diff --git a/docs/changelog/106252.yaml b/docs/changelog/106252.yaml
new file mode 100644
index 0000000000000..5e3f084632b9d
--- /dev/null
+++ b/docs/changelog/106252.yaml
@@ -0,0 +1,6 @@
+pr: 106252
+summary: Add min/max range of the `event.ingested` field to cluster state for searchable
+ snapshots
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/changelog/106253.yaml b/docs/changelog/106253.yaml
deleted file mode 100644
index b80cda37f63c7..0000000000000
--- a/docs/changelog/106253.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-pr: 106253
-summary: Fix for from parameter when using `sub_searches` and rank
-area: Ranking
-type: bug
-issues:
- - 99011
diff --git a/docs/changelog/106520.yaml b/docs/changelog/106520.yaml
new file mode 100644
index 0000000000000..c3fe69a4c3dbd
--- /dev/null
+++ b/docs/changelog/106520.yaml
@@ -0,0 +1,6 @@
+pr: 106520
+summary: Updated the transport CA name in Security Auto-Configuration.
+area: Security
+type: bug
+issues:
+ - 106455
diff --git a/docs/changelog/107047.yaml b/docs/changelog/107047.yaml
new file mode 100644
index 0000000000000..89caed6f55074
--- /dev/null
+++ b/docs/changelog/107047.yaml
@@ -0,0 +1,6 @@
+pr: 107047
+summary: "Search/Mapping: KnnVectorQueryBuilder support for allowUnmappedFields"
+area: Search
+type: bug
+issues:
+ - 106846
diff --git a/docs/changelog/107191.yaml b/docs/changelog/107191.yaml
new file mode 100644
index 0000000000000..5ef6297c0f3f1
--- /dev/null
+++ b/docs/changelog/107191.yaml
@@ -0,0 +1,17 @@
+pr: 107191
+summary: Stricter failure handling in multi-repo get-snapshots request handling
+area: Snapshot/Restore
+type: bug
+issues: []
+highlight:
+ title: Stricter failure handling in multi-repo get-snapshots request handling
+ body: |
+ If a multi-repo get-snapshots request encounters a failure in one of the
+ targeted repositories then earlier versions of Elasticsearch would proceed
+ as if the faulty repository did not exist, except for a per-repository
+ failure report in a separate section of the response body. This makes it
+ impossible to paginate the results properly in the presence of failures. In
+ versions 8.15.0 and later this API's failure handling behaviour has been
+ made stricter, reporting an overall failure if any targeted repository's
+ contents cannot be listed.
+ notable: true
diff --git a/docs/changelog/107415.yaml b/docs/changelog/107415.yaml
new file mode 100644
index 0000000000000..8877d0426c60d
--- /dev/null
+++ b/docs/changelog/107415.yaml
@@ -0,0 +1,6 @@
+pr: 107415
+summary: Fix `DecayFunctions'` `toString`
+area: Search
+type: bug
+issues:
+ - 100870
diff --git a/docs/changelog/108395.yaml b/docs/changelog/108395.yaml
new file mode 100644
index 0000000000000..c33cf169a99fa
--- /dev/null
+++ b/docs/changelog/108395.yaml
@@ -0,0 +1,5 @@
+pr: 108395
+summary: "ESQL: change from quoting from backtick to quote"
+area: ES|QL
+type: bug
+issues: []
diff --git a/docs/changelog/108606.yaml b/docs/changelog/108606.yaml
new file mode 100644
index 0000000000000..04780bff58800
--- /dev/null
+++ b/docs/changelog/108606.yaml
@@ -0,0 +1,14 @@
+pr: 108606
+summary: "Extend ISO8601 datetime parser to specify forbidden fields, allowing it to be used\
+ \ on more formats"
+area: Infra/Core
+type: enhancement
+issues: []
+highlight:
+ title: New custom parser for more ISO-8601 date formats
+ body: |-
+ Following on from #106486, this extends the custom ISO-8601 datetime parser to cover the `strict_year`,
+ `strict_year_month`, `strict_date_time`, `strict_date_time_no_millis`, `strict_date_hour_minute_second`,
+ `strict_date_hour_minute_second_millis`, and `strict_date_hour_minute_second_fraction` date formats.
+ As before, the parser will use the existing java.time parser if there are parsing issues, and the
+ `es.datetime.java_time_parsers=true` JVM property will force the use of the old parsers regardless.
diff --git a/docs/changelog/108733.yaml b/docs/changelog/108733.yaml
new file mode 100644
index 0000000000000..76a969219ea4c
--- /dev/null
+++ b/docs/changelog/108733.yaml
@@ -0,0 +1,5 @@
+pr: 108733
+summary: Query Roles API
+area: Security
+type: feature
+issues: []
diff --git a/docs/changelog/108764.yaml b/docs/changelog/108764.yaml
new file mode 100644
index 0000000000000..94de27eb52c9b
--- /dev/null
+++ b/docs/changelog/108764.yaml
@@ -0,0 +1,6 @@
+pr: 108764
+summary: ST_DISTANCE Function
+area: ES|QL
+type: enhancement
+issues:
+ - 108212
diff --git a/docs/changelog/109084.yaml b/docs/changelog/109084.yaml
new file mode 100644
index 0000000000000..67ff5610c5a66
--- /dev/null
+++ b/docs/changelog/109084.yaml
@@ -0,0 +1,5 @@
+pr: 109084
+summary: Add AVX-512 optimised vector distance functions for int7 on x64
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/changelog/109341.yaml b/docs/changelog/109341.yaml
deleted file mode 100644
index 0c1eaa98a8aa2..0000000000000
--- a/docs/changelog/109341.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-pr: 109341
-summary: Re-define `index.mapper.dynamic` setting in 8.x for a better 7.x to 8.x upgrade if this setting is used.
-area: Mapping
-type: bug
-issues: []
diff --git a/docs/changelog/109395.yaml b/docs/changelog/109395.yaml
new file mode 100644
index 0000000000000..e5336695afa48
--- /dev/null
+++ b/docs/changelog/109395.yaml
@@ -0,0 +1,5 @@
+pr: 109395
+summary: Correct positioning for unique token filter
+area: Analysis
+type: bug
+issues: []
diff --git a/docs/changelog/109492.yaml b/docs/changelog/109492.yaml
deleted file mode 100644
index d4d1e83eb7786..0000000000000
--- a/docs/changelog/109492.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-pr: 109492
-summary: Add hexstring support byte painless scorers
-area: Search
-type: bug
-issues: []
diff --git a/docs/changelog/109500.yaml b/docs/changelog/109500.yaml
deleted file mode 100644
index cfd6bc770d5d6..0000000000000
--- a/docs/changelog/109500.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-pr: 109500
-summary: Guard file settings readiness on file settings support
-area: Infra/Settings
-type: bug
-issues: []
diff --git a/docs/changelog/109501.yaml b/docs/changelog/109501.yaml
new file mode 100644
index 0000000000000..6e81f98816cbf
--- /dev/null
+++ b/docs/changelog/109501.yaml
@@ -0,0 +1,14 @@
+pr: 109501
+summary: Reflect latest changes in synthetic source documentation
+area: Mapping
+type: enhancement
+issues: []
+highlight:
+ title: Synthetic `_source` improvements
+ body: |-
+ There are multiple improvements to synthetic `_source` functionality:
+
+ * Synthetic `_source` is now supported for all field types including `nested` and `object`. `object` fields are supported with `enabled` set to `false`.
+
+ * Synthetic `_source` can be enabled together with `ignore_malformed` and `ignore_above` parameters for all field types that support them.
+ notable: false
diff --git a/docs/changelog/109533.yaml b/docs/changelog/109533.yaml
deleted file mode 100644
index 5720410e5f370..0000000000000
--- a/docs/changelog/109533.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-pr: 109533
-summary: Fix IndexOutOfBoundsException during inference
-area: Machine Learning
-type: bug
-issues: []
diff --git a/docs/changelog/109629.yaml b/docs/changelog/109629.yaml
deleted file mode 100644
index c468388117b72..0000000000000
--- a/docs/changelog/109629.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-pr: 109629
-summary: "[Data streams] Fix the description of the lazy rollover task"
-area: Data streams
-type: bug
-issues: []
diff --git a/docs/changelog/109632.yaml b/docs/changelog/109632.yaml
deleted file mode 100644
index 6b04160bbdbec..0000000000000
--- a/docs/changelog/109632.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-pr: 109632
-summary: Force execute inactive sink reaper
-area: ES|QL
-type: bug
-issues: []
diff --git a/docs/changelog/109636.yaml b/docs/changelog/109636.yaml
deleted file mode 100644
index f8f73a75dfd3d..0000000000000
--- a/docs/changelog/109636.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-pr: 109636
-summary: "Ensure a lazy rollover request will rollover the target data stream once."
-area: Data streams
-type: bug
-issues: []
diff --git a/docs/changelog/109667.yaml b/docs/changelog/109667.yaml
new file mode 100644
index 0000000000000..782a1b1cf6c9b
--- /dev/null
+++ b/docs/changelog/109667.yaml
@@ -0,0 +1,5 @@
+pr: 109667
+summary: Inference autoscaling
+area: Machine Learning
+type: feature
+issues: []
diff --git a/docs/changelog/109684.yaml b/docs/changelog/109684.yaml
new file mode 100644
index 0000000000000..156f568290cf5
--- /dev/null
+++ b/docs/changelog/109684.yaml
@@ -0,0 +1,5 @@
+pr: 109684
+summary: Avoid `ModelAssignment` deadlock
+area: Machine Learning
+type: bug
+issues: []
diff --git a/docs/changelog/109695.yaml b/docs/changelog/109695.yaml
deleted file mode 100644
index f922b76412676..0000000000000
--- a/docs/changelog/109695.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-pr: 109695
-summary: Fix ESQL cancellation for exchange requests
-area: ES|QL
-type: bug
-issues: []
diff --git a/docs/changelog/109807.yaml b/docs/changelog/109807.yaml
new file mode 100644
index 0000000000000..5cf8a2c896c4e
--- /dev/null
+++ b/docs/changelog/109807.yaml
@@ -0,0 +1,6 @@
+pr: 109807
+summary: "ESQL: Fix LOOKUP attribute shadowing"
+area: ES|QL
+type: bug
+issues:
+ - 109392
diff --git a/docs/changelog/109813.yaml b/docs/changelog/109813.yaml
new file mode 100644
index 0000000000000..edcef17e87606
--- /dev/null
+++ b/docs/changelog/109813.yaml
@@ -0,0 +1,5 @@
+pr: 109813
+summary: Add text similarity reranker retriever
+area: Ranking
+type: feature
+issues: []
diff --git a/docs/changelog/109824.yaml b/docs/changelog/109824.yaml
deleted file mode 100644
index 987e8c0a8b1a2..0000000000000
--- a/docs/changelog/109824.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-pr: 109824
-summary: Check array size before returning array item in script doc values
-area: Infra/Scripting
-type: bug
-issues:
- - 104998
diff --git a/docs/changelog/109850.yaml b/docs/changelog/109850.yaml
deleted file mode 100644
index 0f11318765aea..0000000000000
--- a/docs/changelog/109850.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-pr: 109850
-summary: Ensure tasks preserve versions in `MasterService`
-area: Cluster Coordination
-type: bug
-issues: []
diff --git a/docs/changelog/109873.yaml b/docs/changelog/109873.yaml
new file mode 100644
index 0000000000000..c77197cc22d0a
--- /dev/null
+++ b/docs/changelog/109873.yaml
@@ -0,0 +1,5 @@
+pr: 109873
+summary: "ESQL: add Arrow dataframes output format"
+area: ES|QL
+type: feature
+issues: []
diff --git a/docs/changelog/109876.yaml b/docs/changelog/109876.yaml
new file mode 100644
index 0000000000000..4a65b4e17c4a3
--- /dev/null
+++ b/docs/changelog/109876.yaml
@@ -0,0 +1,6 @@
+pr: 109876
+summary: Always pick the user `maxPageSize` value
+area: Transform
+type: bug
+issues:
+ - 109844
diff --git a/docs/changelog/109880.yaml b/docs/changelog/109880.yaml
new file mode 100644
index 0000000000000..71c7209824a8a
--- /dev/null
+++ b/docs/changelog/109880.yaml
@@ -0,0 +1,10 @@
+pr: 109880
+summary: Deprecate `text_expansion` and `weighted_tokens` queries
+area: Machine Learning
+type: deprecation
+issues: [ ]
+deprecation:
+ title: Deprecate `text_expansion` and `weighted_tokens` queries
+ area: REST API
+ details: The `text_expansion` and `weighted_tokens` queries have been replaced by `sparse_vector`.
+ impact: Please update your existing `text_expansion` and `weighted_tokens` queries to use `sparse_vector.`
diff --git a/docs/changelog/109893.yaml b/docs/changelog/109893.yaml
new file mode 100644
index 0000000000000..df6d6e51236c8
--- /dev/null
+++ b/docs/changelog/109893.yaml
@@ -0,0 +1,5 @@
+pr: 109893
+summary: Add Anthropic messages integration to Inference API
+area: Machine Learning
+type: enhancement
+issues: [ ]
diff --git a/docs/changelog/109908.yaml b/docs/changelog/109908.yaml
new file mode 100644
index 0000000000000..cdf2acf17096c
--- /dev/null
+++ b/docs/changelog/109908.yaml
@@ -0,0 +1,5 @@
+pr: 109908
+summary: "Update checkpoints after post-replication actions, even on failure"
+area: CRUD
+type: bug
+issues: []
diff --git a/docs/changelog/109957.yaml b/docs/changelog/109957.yaml
new file mode 100644
index 0000000000000..6bbcd8175501c
--- /dev/null
+++ b/docs/changelog/109957.yaml
@@ -0,0 +1,6 @@
+pr: 109957
+summary: Add request metric to `RestController` to track success/failure (by status
+ code)
+area: Infra/Metrics
+type: enhancement
+issues: []
diff --git a/docs/changelog/109963.yaml b/docs/changelog/109963.yaml
new file mode 100644
index 0000000000000..1745d549582d4
--- /dev/null
+++ b/docs/changelog/109963.yaml
@@ -0,0 +1,6 @@
+pr: 109963
+summary: Propagate mapper builder context flags across nested mapper builder context
+ creation
+area: Mapping
+type: bug
+issues: []
diff --git a/docs/changelog/109967.yaml b/docs/changelog/109967.yaml
new file mode 100644
index 0000000000000..cfc6b6462954b
--- /dev/null
+++ b/docs/changelog/109967.yaml
@@ -0,0 +1,5 @@
+pr: 109967
+summary: Default the HF service to cosine similarity
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/changelog/109981.yaml b/docs/changelog/109981.yaml
new file mode 100644
index 0000000000000..cf9388f79e29c
--- /dev/null
+++ b/docs/changelog/109981.yaml
@@ -0,0 +1,5 @@
+pr: 109981
+summary: Limit number of synonym rules that can be created
+area: Mapping
+type: bug
+issues: [108785]
diff --git a/docs/changelog/109989.yaml b/docs/changelog/109989.yaml
new file mode 100644
index 0000000000000..f1f5972b60eb3
--- /dev/null
+++ b/docs/changelog/109989.yaml
@@ -0,0 +1,5 @@
+pr: 109989
+summary: "ESQL: Fix Join references"
+area: ES|QL
+type: bug
+issues: []
diff --git a/docs/changelog/109993.yaml b/docs/changelog/109993.yaml
new file mode 100644
index 0000000000000..40d161b6b5c24
--- /dev/null
+++ b/docs/changelog/109993.yaml
@@ -0,0 +1,5 @@
+pr: 109993
+summary: "[ES|QL] `weighted_avg`"
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/docs/changelog/110004.yaml b/docs/changelog/110004.yaml
new file mode 100644
index 0000000000000..f680016527a9c
--- /dev/null
+++ b/docs/changelog/110004.yaml
@@ -0,0 +1,11 @@
+pr: 110004
+summary: Mark Query Rules as GA
+area: Relevance
+type: feature
+issues: []
+highlight:
+ title: Mark Query Rules as GA
+ body: |-
+ This PR marks query rules as Generally Available. All APIs are no longer
+ in tech preview.
+ notable: true
diff --git a/docs/changelog/110016.yaml b/docs/changelog/110016.yaml
new file mode 100644
index 0000000000000..28ad55aa796c8
--- /dev/null
+++ b/docs/changelog/110016.yaml
@@ -0,0 +1,5 @@
+pr: 110016
+summary: Opt in keyword field into fallback synthetic source if needed
+area: Mapping
+type: enhancement
+issues: []
diff --git a/docs/changelog/110019.yaml b/docs/changelog/110019.yaml
new file mode 100644
index 0000000000000..632e79008d351
--- /dev/null
+++ b/docs/changelog/110019.yaml
@@ -0,0 +1,6 @@
+pr: 110019
+summary: Improve mechanism for extracting the result of a `PlainActionFuture`
+area: Distributed
+type: enhancement
+issues:
+ - 108125
diff --git a/docs/changelog/110021.yaml b/docs/changelog/110021.yaml
new file mode 100644
index 0000000000000..51878b960dfd0
--- /dev/null
+++ b/docs/changelog/110021.yaml
@@ -0,0 +1,6 @@
+pr: 110021
+summary: "[ES|QL] validate `mv_sort` order"
+area: ES|QL
+type: bug
+issues:
+ - 109910
diff --git a/docs/changelog/110046.yaml b/docs/changelog/110046.yaml
new file mode 100644
index 0000000000000..6ebe440e7aced
--- /dev/null
+++ b/docs/changelog/110046.yaml
@@ -0,0 +1,6 @@
+pr: 110046
+summary: "ESQL: make named params objects truly per request"
+area: ES|QL
+type: bug
+issues:
+ - 110028
diff --git a/docs/changelog/110059.yaml b/docs/changelog/110059.yaml
new file mode 100644
index 0000000000000..ba160c091cdc2
--- /dev/null
+++ b/docs/changelog/110059.yaml
@@ -0,0 +1,32 @@
+pr: 110059
+summary: Adds new `bit` `element_type` for `dense_vectors`
+area: Vector Search
+type: feature
+issues: []
+highlight:
+ title: Adds new `bit` `element_type` for `dense_vectors`
+ body: |-
+ This adds `bit` vector support by adding `element_type: bit` for
+ vectors. This new element type works for indexed and non-indexed
+ vectors. Additionally, it works with `hnsw` and `flat` index types. No
+ quantization based codec works with this element type, this is
+ consistent with `byte` vectors.
+
+ `bit` vectors accept up to `32768` dimensions in size and expect vectors
+ that are being indexed to be encoded either as a hexidecimal string or a
+ `byte[]` array where each element of the `byte` array represents `8`
+ bits of the vector.
+
+ `bit` vectors support script usage and regular query usage. When
+ indexed, all comparisons done are `xor` and `popcount` summations (aka,
+ hamming distance), and the scores are transformed and normalized given
+ the vector dimensions.
+
+ For scripts, `l1norm` is the same as `hamming` distance and `l2norm` is
+ `sqrt(l1norm)`. `dotProduct` and `cosineSimilarity` are not supported.
+
+ Note, the dimensions expected by this element_type are always to be
+ divisible by `8`, and the `byte[]` vectors provided for index must be
+ have size `dim/8` size, where each byte element represents `8` bits of
+ the vectors.
+ notable: true
diff --git a/docs/changelog/110061.yaml b/docs/changelog/110061.yaml
new file mode 100644
index 0000000000000..1880a2a197722
--- /dev/null
+++ b/docs/changelog/110061.yaml
@@ -0,0 +1,6 @@
+pr: 110061
+summary: Avoiding running watch jobs in TickerScheduleTriggerEngine if it is paused
+area: Watcher
+type: bug
+issues:
+ - 105933
diff --git a/docs/changelog/110066.yaml b/docs/changelog/110066.yaml
new file mode 100644
index 0000000000000..920c6304b63ae
--- /dev/null
+++ b/docs/changelog/110066.yaml
@@ -0,0 +1,6 @@
+pr: 110066
+summary: Support flattened fields and multi-fields as dimensions in downsampling
+area: Downsampling
+type: bug
+issues:
+ - 99297
diff --git a/docs/changelog/110096.yaml b/docs/changelog/110096.yaml
new file mode 100644
index 0000000000000..3d6616c289266
--- /dev/null
+++ b/docs/changelog/110096.yaml
@@ -0,0 +1,6 @@
+pr: 110096
+summary: Fix `ClassCastException` with MV_EXPAND on missing field
+area: ES|QL
+type: bug
+issues:
+ - 109974
diff --git a/docs/changelog/110102.yaml b/docs/changelog/110102.yaml
new file mode 100644
index 0000000000000..d1b9b53e2dfc5
--- /dev/null
+++ b/docs/changelog/110102.yaml
@@ -0,0 +1,6 @@
+pr: 110102
+summary: Optimize ST_DISTANCE filtering with Lucene circle intersection query
+area: ES|QL
+type: enhancement
+issues:
+ - 109972
diff --git a/docs/changelog/110112.yaml b/docs/changelog/110112.yaml
new file mode 100644
index 0000000000000..eca5fd9af15ce
--- /dev/null
+++ b/docs/changelog/110112.yaml
@@ -0,0 +1,5 @@
+pr: 110112
+summary: Increase response size limit for batched requests
+area: Machine Learning
+type: bug
+issues: []
diff --git a/docs/changelog/110146.yaml b/docs/changelog/110146.yaml
new file mode 100644
index 0000000000000..61ba35cec319b
--- /dev/null
+++ b/docs/changelog/110146.yaml
@@ -0,0 +1,5 @@
+pr: 110146
+summary: Fix trailing slash in `ml.get_categories` specification
+area: Machine Learning
+type: bug
+issues: []
diff --git a/docs/changelog/110160.yaml b/docs/changelog/110160.yaml
new file mode 100644
index 0000000000000..0c38c23c69067
--- /dev/null
+++ b/docs/changelog/110160.yaml
@@ -0,0 +1,5 @@
+pr: 110160
+summary: Opt in number fields into fallback synthetic source when doc values a…
+area: Mapping
+type: enhancement
+issues: []
diff --git a/docs/changelog/110176.yaml b/docs/changelog/110176.yaml
new file mode 100644
index 0000000000000..ae1d7d10d6dc4
--- /dev/null
+++ b/docs/changelog/110176.yaml
@@ -0,0 +1,5 @@
+pr: 110176
+summary: Fix trailing slash in two rollup specifications
+area: Rollup
+type: bug
+issues: []
diff --git a/docs/changelog/110177.yaml b/docs/changelog/110177.yaml
new file mode 100644
index 0000000000000..0ac5328d88df4
--- /dev/null
+++ b/docs/changelog/110177.yaml
@@ -0,0 +1,5 @@
+pr: 110177
+summary: Fix trailing slash in `security.put_privileges` specification
+area: Authorization
+type: bug
+issues: []
diff --git a/docs/changelog/110179.yaml b/docs/changelog/110179.yaml
new file mode 100644
index 0000000000000..b99a390c8586f
--- /dev/null
+++ b/docs/changelog/110179.yaml
@@ -0,0 +1,6 @@
+pr: 110179
+summary: Make repository analysis API available to non-operators
+area: Snapshot/Restore
+type: enhancement
+issues:
+ - 100318
diff --git a/docs/changelog/110186.yaml b/docs/changelog/110186.yaml
new file mode 100644
index 0000000000000..23eaab118e2ab
--- /dev/null
+++ b/docs/changelog/110186.yaml
@@ -0,0 +1,6 @@
+pr: 110186
+summary: Don't sample calls to `ReduceContext#consumeBucketsAndMaybeBreak` ins `InternalDateHistogram`
+ and `InternalHistogram` during reduction
+area: Aggregations
+type: bug
+issues: []
diff --git a/docs/changelog/110201.yaml b/docs/changelog/110201.yaml
new file mode 100644
index 0000000000000..a880638881948
--- /dev/null
+++ b/docs/changelog/110201.yaml
@@ -0,0 +1,6 @@
+pr: 110201
+summary: "ES|QL: Fix DISSECT that overwrites input"
+area: ES|QL
+type: bug
+issues:
+ - 110184
diff --git a/docs/changelog/110214.yaml b/docs/changelog/110214.yaml
new file mode 100644
index 0000000000000..20f61cac64454
--- /dev/null
+++ b/docs/changelog/110214.yaml
@@ -0,0 +1,5 @@
+pr: 110214
+summary: Handle `ignore_above` in synthetic source for flattened fields
+area: Mapping
+type: enhancement
+issues: []
diff --git a/docs/changelog/110233.yaml b/docs/changelog/110233.yaml
new file mode 100644
index 0000000000000..d9ce4057090a4
--- /dev/null
+++ b/docs/changelog/110233.yaml
@@ -0,0 +1,6 @@
+pr: 110233
+summary: Support k parameter for knn query
+area: Vector Search
+type: enhancement
+issues:
+ - 108473
diff --git a/docs/changelog/110234.yaml b/docs/changelog/110234.yaml
new file mode 100644
index 0000000000000..0656ba5fb6636
--- /dev/null
+++ b/docs/changelog/110234.yaml
@@ -0,0 +1,5 @@
+pr: 110234
+summary: Upgrade to Lucene-9.11.1
+area: Search
+type: upgrade
+issues: []
diff --git a/docs/changelog/110236.yaml b/docs/changelog/110236.yaml
new file mode 100644
index 0000000000000..e2dbff7fbf768
--- /dev/null
+++ b/docs/changelog/110236.yaml
@@ -0,0 +1,21 @@
+pr: 110236
+summary: '`ParseHeapRatioOrDeprecatedByteSizeValue` for `indices.breaker.total.limit`'
+area: Infra/Settings
+type: deprecation
+issues: []
+deprecation:
+ title: 'Deprecate absolute size values for `indices.breaker.total.limit` setting'
+ area: Cluster and node setting
+ details: Previously, the value of `indices.breaker.total.limit` could be specified as
+ an absolute size in bytes. This setting controls the overal amount of
+ memory the server is allowed to use before taking remedial actions. Setting
+ this to a specific number of bytes led to strange behaviour when the node
+ maximum heap size changed because the circut breaker limit would remain
+ unchanged. This would either leave the value too low, causing part of the
+ heap to remain unused; or it would leave the value too high, causing the
+ circuit breaker to be ineffective at preventing OOM errors. The only
+ reasonable behaviour for this setting is that it scales with the size of
+ the heap, and so absolute byte limits are now deprecated.
+ impact: Users must change their configuration to specify a percentage instead of
+ an absolute number of bytes for `indices.breaker.total.limit`, or else
+ accept the default, which is already specified as a percentage.
diff --git a/docs/changelog/110251.yaml b/docs/changelog/110251.yaml
new file mode 100644
index 0000000000000..a3b0c3128be35
--- /dev/null
+++ b/docs/changelog/110251.yaml
@@ -0,0 +1,13 @@
+pr: 110251
+summary: Support index sorting with nested fields
+area: Logs
+type: enhancement
+issues:
+ - 107349
+highlight:
+ title: Index sorting on indexes with nested fields
+ body: |-
+ Index sorting is now supported for indexes with mappings containing nested objects.
+ The index sort spec (as specified by `index.sort.field`) can't contain any nested
+ fields, still.
+ notable: false
diff --git a/docs/changelog/110334.yaml b/docs/changelog/110334.yaml
new file mode 100644
index 0000000000000..f83ac04ded773
--- /dev/null
+++ b/docs/changelog/110334.yaml
@@ -0,0 +1,5 @@
+pr: 110334
+summary: Sentence Chunker
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/changelog/110337.yaml b/docs/changelog/110337.yaml
new file mode 100644
index 0000000000000..bf21a95c9157f
--- /dev/null
+++ b/docs/changelog/110337.yaml
@@ -0,0 +1,5 @@
+pr: 110337
+summary: Support `ignore_above` on keyword dimensions
+area: TSDB
+type: enhancement
+issues: []
diff --git a/docs/changelog/110338.yaml b/docs/changelog/110338.yaml
new file mode 100644
index 0000000000000..2334a1cbc9283
--- /dev/null
+++ b/docs/changelog/110338.yaml
@@ -0,0 +1,5 @@
+pr: 110338
+summary: Add `semantic_text` field type and `semantic` query
+area: Mapping
+type: feature
+issues: []
diff --git a/docs/changelog/110347.yaml b/docs/changelog/110347.yaml
new file mode 100644
index 0000000000000..8727128230935
--- /dev/null
+++ b/docs/changelog/110347.yaml
@@ -0,0 +1,5 @@
+pr: 110347
+summary: "ESQL: Renamed `TopList` to Top"
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/docs/changelog/110352.yaml b/docs/changelog/110352.yaml
new file mode 100644
index 0000000000000..7dad1ce5f6dd4
--- /dev/null
+++ b/docs/changelog/110352.yaml
@@ -0,0 +1,5 @@
+pr: 110352
+summary: Search coordinator uses `event.ingested` in cluster state to do rewrites
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/changelog/110361.yaml b/docs/changelog/110361.yaml
new file mode 100644
index 0000000000000..8558c88e06049
--- /dev/null
+++ b/docs/changelog/110361.yaml
@@ -0,0 +1,7 @@
+pr: 110361
+summary: Don't detect `PlainActionFuture` deadlock on concurrent complete
+area: Distributed
+type: bug
+issues:
+ - 110181
+ - 110360
diff --git a/docs/changelog/110369.yaml b/docs/changelog/110369.yaml
new file mode 100644
index 0000000000000..770294605b444
--- /dev/null
+++ b/docs/changelog/110369.yaml
@@ -0,0 +1,6 @@
+pr: 110369
+summary: Run terms concurrently when cardinality is only lower than shard size
+area: Aggregations
+type: bug
+issues:
+ - 105505
diff --git a/docs/changelog/110383.yaml b/docs/changelog/110383.yaml
new file mode 100644
index 0000000000000..5e9bddd4bfcd2
--- /dev/null
+++ b/docs/changelog/110383.yaml
@@ -0,0 +1,5 @@
+pr: 110383
+summary: Add bulk delete roles API
+area: Security
+type: enhancement
+issues: []
diff --git a/docs/changelog/110391.yaml b/docs/changelog/110391.yaml
new file mode 100644
index 0000000000000..1e00eda970398
--- /dev/null
+++ b/docs/changelog/110391.yaml
@@ -0,0 +1,6 @@
+pr: 110391
+summary: Fix ST_DISTANCE Lucene push-down for complex predicates
+area: ES|QL
+type: bug
+issues:
+ - 110349
diff --git a/docs/changelog/110395.yaml b/docs/changelog/110395.yaml
new file mode 100644
index 0000000000000..690be55abb5b2
--- /dev/null
+++ b/docs/changelog/110395.yaml
@@ -0,0 +1,9 @@
+pr: 110395
+summary: Mark the Redact processor as Generally Available
+area: Ingest Node
+type: feature
+issues: []
+highlight:
+ title: The Redact processor is Generally Available
+ body: The Redact processor uses the Grok rules engine to obscure text in the input document matching the given Grok patterns. The Redact processor was initially released as Technical Preview in `8.7.0`, and is now released as Generally Available.
+ notable: true
diff --git a/docs/changelog/110399.yaml b/docs/changelog/110399.yaml
new file mode 100644
index 0000000000000..9e04e2656809e
--- /dev/null
+++ b/docs/changelog/110399.yaml
@@ -0,0 +1,6 @@
+pr: 110399
+summary: "[Inference API] Prevent inference endpoints from being deleted if they are\
+ \ referenced by semantic text"
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/changelog/110427.yaml b/docs/changelog/110427.yaml
new file mode 100644
index 0000000000000..ba8a1246e90e4
--- /dev/null
+++ b/docs/changelog/110427.yaml
@@ -0,0 +1,6 @@
+pr: 110427
+summary: "[Inference API] Remove unused Cohere rerank service settings fields in a\
+ \ BWC way"
+area: Machine Learning
+type: bug
+issues: []
diff --git a/docs/changelog/110431.yaml b/docs/changelog/110431.yaml
new file mode 100644
index 0000000000000..0dd93ef718ef9
--- /dev/null
+++ b/docs/changelog/110431.yaml
@@ -0,0 +1,5 @@
+pr: 110431
+summary: "[Inference API] Fix serialization for inference delete endpoint response"
+area: Machine Learning
+type: bug
+issues: []
diff --git a/docs/changelog/110476.yaml b/docs/changelog/110476.yaml
new file mode 100644
index 0000000000000..bc12b3711a366
--- /dev/null
+++ b/docs/changelog/110476.yaml
@@ -0,0 +1,7 @@
+pr: 110476
+summary: Fix bug in union-types with type-casting in grouping key of STATS
+area: ES|QL
+type: bug
+issues:
+ - 109922
+ - 110477
diff --git a/docs/changelog/110520.yaml b/docs/changelog/110520.yaml
new file mode 100644
index 0000000000000..fba4b84e2279e
--- /dev/null
+++ b/docs/changelog/110520.yaml
@@ -0,0 +1,5 @@
+pr: 110520
+summary: Add protection for OOM during aggregations partial reduction
+area: Aggregations
+type: enhancement
+issues: []
diff --git a/docs/changelog/110527.yaml b/docs/changelog/110527.yaml
new file mode 100644
index 0000000000000..3ab19ecaaaa76
--- /dev/null
+++ b/docs/changelog/110527.yaml
@@ -0,0 +1,5 @@
+pr: 110527
+summary: "ESQL: Add boolean support to Max and Min aggs"
+area: ES|QL
+type: feature
+issues: []
diff --git a/docs/changelog/110540.yaml b/docs/changelog/110540.yaml
new file mode 100644
index 0000000000000..5e4994da80704
--- /dev/null
+++ b/docs/changelog/110540.yaml
@@ -0,0 +1,16 @@
+pr: 110540
+summary: Deprecate using slm privileges to access ilm
+area: ILM+SLM
+type: deprecation
+issues: []
+deprecation:
+ title: Deprecate using slm privileges to access ilm
+ area: REST API
+ details: The `read_slm` privilege can get the ILM status, and
+ the `manage_slm` privilege can start and stop ILM. Access to these
+ APIs should be granted using the `read_ilm` and `manage_ilm` privileges
+ instead. Access to ILM APIs will be removed from SLM privileges in
+ a future major release, and is now deprecated.
+ impact: Users that need access to the ILM status API should now
+ use the `read_ilm` privilege. Users that need to start and stop ILM,
+ should use the `manage_ilm` privilege.
diff --git a/docs/changelog/110554.yaml b/docs/changelog/110554.yaml
new file mode 100644
index 0000000000000..8c0b896a4c979
--- /dev/null
+++ b/docs/changelog/110554.yaml
@@ -0,0 +1,5 @@
+pr: 110554
+summary: Fix `MapperBuilderContext#isDataStream` when used in dynamic mappers
+area: "Mapping"
+type: bug
+issues: []
diff --git a/docs/changelog/110574.yaml b/docs/changelog/110574.yaml
new file mode 100644
index 0000000000000..1840838500151
--- /dev/null
+++ b/docs/changelog/110574.yaml
@@ -0,0 +1,6 @@
+pr: 110574
+summary: "ES|QL: better validation for GROK patterns"
+area: ES|QL
+type: bug
+issues:
+ - 110533
diff --git a/docs/changelog/110578.yaml b/docs/changelog/110578.yaml
new file mode 100644
index 0000000000000..5d48171e4f328
--- /dev/null
+++ b/docs/changelog/110578.yaml
@@ -0,0 +1,5 @@
+pr: 110578
+summary: Add `size_in_bytes` to enrich cache stats
+area: Ingest Node
+type: enhancement
+issues: []
diff --git a/docs/changelog/110586.yaml b/docs/changelog/110586.yaml
new file mode 100644
index 0000000000000..cc2bcb85a2dac
--- /dev/null
+++ b/docs/changelog/110586.yaml
@@ -0,0 +1,5 @@
+pr: 110586
+summary: "ESQL: Fix Max doubles bug with negatives and add tests for Max and Min"
+area: ES|QL
+type: bug
+issues: []
diff --git a/docs/changelog/110603.yaml b/docs/changelog/110603.yaml
new file mode 100644
index 0000000000000..4ba19985853df
--- /dev/null
+++ b/docs/changelog/110603.yaml
@@ -0,0 +1,6 @@
+pr: 110603
+summary: Stop iterating over all fields to extract @timestamp value
+area: TSDB
+type: enhancement
+issues:
+ - 92297
diff --git a/docs/changelog/110651.yaml b/docs/changelog/110651.yaml
new file mode 100644
index 0000000000000..c25c63ee0284a
--- /dev/null
+++ b/docs/changelog/110651.yaml
@@ -0,0 +1,5 @@
+pr: 110651
+summary: "Remove `default_field: message` from metrics index templates"
+area: Data streams
+type: enhancement
+issues: []
diff --git a/docs/changelog/110665.yaml b/docs/changelog/110665.yaml
new file mode 100644
index 0000000000000..fa6db3190fe60
--- /dev/null
+++ b/docs/changelog/110665.yaml
@@ -0,0 +1,6 @@
+pr: 110665
+summary: "[ESQL] Fix parsing of large magnitude negative numbers"
+area: ES|QL
+type: bug
+issues:
+ - 104323
diff --git a/docs/changelog/110666.yaml b/docs/changelog/110666.yaml
new file mode 100644
index 0000000000000..d96f8e2024c81
--- /dev/null
+++ b/docs/changelog/110666.yaml
@@ -0,0 +1,5 @@
+pr: 110666
+summary: Removing the use of Stream::peek from `GeoIpDownloader::cleanDatabases`
+area: Ingest Node
+type: bug
+issues: []
diff --git a/docs/changelog/110676.yaml b/docs/changelog/110676.yaml
new file mode 100644
index 0000000000000..efe7e0e55f18f
--- /dev/null
+++ b/docs/changelog/110676.yaml
@@ -0,0 +1,5 @@
+pr: 110676
+summary: Allow querying `index_mode`
+area: Mapping
+type: enhancement
+issues: []
diff --git a/docs/changelog/110707.yaml b/docs/changelog/110707.yaml
new file mode 100644
index 0000000000000..e13688c73c743
--- /dev/null
+++ b/docs/changelog/110707.yaml
@@ -0,0 +1,5 @@
+pr: 110707
+summary: Fix issue with returning incomplete fragment for plain highlighter
+area: Highlighting
+type: bug
+issues: []
diff --git a/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc
index 5afed11923a2a..f88cad3296282 100644
--- a/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc
@@ -78,7 +78,7 @@ following output:
"start_offset" : 24,
"end_offset" : 28,
"type" : "word",
- "position" : 4
+ "position" : 5
}
]
}
diff --git a/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc b/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc
index 217b29451937d..730dad852adee 100644
--- a/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc
+++ b/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc
@@ -31,7 +31,7 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec
(Optional, integer) The offset from the first result to fetch. Defaults to `0`.
`status`::
-(Optional, job status) A comma-separated list of job statuses to filter the results. Available statuses include: `canceling`, `canceled`, `completed`, `error`, `in_progress`, `pending`, `suspended`.
+(Optional, job status) A job status to filter the results for. Available statuses include: `canceling`, `canceled`, `completed`, `error`, `in_progress`, `pending`, `suspended`.
`connector_id`::
(Optional, string) The connector id the fetched sync jobs need to have.
diff --git a/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc b/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc
index 4dd9cc6e67ab2..1427269d22b86 100644
--- a/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc
+++ b/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc
@@ -53,6 +53,9 @@ This API is mainly used by the connector service for updating sync job informati
`last_seen`::
(Optional, instant) The timestamp to set the connector sync job's `last_seen` property.
+`metadata`::
+(Optional, object) The connector-specific metadata.
+
[[set-connector-sync-job-stats-api-response-codes]]
==== {api-response-codes-title}
diff --git a/docs/reference/connector/apis/update-connector-error-api.asciidoc b/docs/reference/connector/apis/update-connector-error-api.asciidoc
index 67ea6b6d17cf0..c6ac0c9a1ac22 100644
--- a/docs/reference/connector/apis/update-connector-error-api.asciidoc
+++ b/docs/reference/connector/apis/update-connector-error-api.asciidoc
@@ -21,6 +21,11 @@ To get started with Connector APIs, check out the {enterprise-search-ref}/connec
* To sync data using self-managed connectors, you need to deploy the {enterprise-search-ref}/build-connector.html[Elastic connector service] on your own infrastructure. This service runs automatically on Elastic Cloud for native connectors.
* The `connector_id` parameter should reference an existing connector.
+[[update-connector-error-api-desc]]
+==== {api-description-title}
+
+Sets the `error` field for the specified connector. If the `error` provided in the request body is non-null, the connector's status is updated to `error`. Otherwise, if the `error` is reset to null, the connector status is updated to `connected`.
+
[[update-connector-error-api-path-params]]
==== {api-path-parms-title}
diff --git a/docs/reference/data-streams/downsampling.asciidoc b/docs/reference/data-streams/downsampling.asciidoc
index b005e83e8c95d..0b08b0972f9a1 100644
--- a/docs/reference/data-streams/downsampling.asciidoc
+++ b/docs/reference/data-streams/downsampling.asciidoc
@@ -18,9 +18,9 @@ Metrics solutions collect large amounts of time series data that grow over time.
As that data ages, it becomes less relevant to the current state of the system.
The downsampling process rolls up documents within a fixed time interval into a
single summary document. Each summary document includes statistical
-representations of the original data: the `min`, `max`, `sum`, `value_count`,
-and `average` for each metric. Data stream <> are stored unchanged.
+representations of the original data: the `min`, `max`, `sum` and `value_count`
+for each metric. Data stream <>
+are stored unchanged.
Downsampling, in effect, lets you to trade data resolution and precision for
storage size. You can include it in an <>, and as a result is
-subject to a number of <>.
+subject to some <> and <> applied to the `_source` field.
NOTE: A time series index can contain fields other than dimensions or metrics.
@@ -109,19 +110,6 @@ parameter:
For a flattened field, use the `time_series_dimensions` parameter to configure an array of fields as dimensions. For details refer to <>.
-[[dimension-limits]]
-.Dimension limits
-****
-In a TSDS, {es} uses dimensions to
-generate the document `_id` and <> values. The resulting `_id` is
-always a short encoded hash. To prevent the `_tsid` value from being overly
-large, {es} limits the number of dimensions for an index using the
-<>
-index setting. While you can increase this limit, the resulting document `_tsid`
-value can't exceed 32KB. Additionally the field name of a dimension cannot be
-longer than 512 bytes and the each dimension value can't exceed 1kb.
-****
-
[discrete]
[[time-series-metric]]
==== Metrics
@@ -290,11 +278,6 @@ created the initial backing index has:
Only data that falls inside that range can be indexed.
-In our <>,
-`index.look_ahead_time` is set to three hours, so only documents with a
-`@timestamp` value that is within three hours previous or subsequent to the
-present time are accepted for indexing.
-
You can use the <> to check the
accepted time range for writing to any TSDS.
diff --git a/docs/reference/docs/index_.asciidoc b/docs/reference/docs/index_.asciidoc
index 9d359fd7d7f02..ccc8e67f39bc0 100644
--- a/docs/reference/docs/index_.asciidoc
+++ b/docs/reference/docs/index_.asciidoc
@@ -211,7 +211,7 @@ creates a dynamic mapping. By default, new fields and objects are
automatically added to the mapping if needed. For more information about field
mapping, see <> and the <> API.
-Automatic index creation is controlled by the `action.auto_create_index`
+Automatic index creation is controlled by the <>
setting. This setting defaults to `true`, which allows any index to be created
automatically. You can modify this setting to explicitly allow or block
automatic creation of indices that match specified patterns, or set it to
diff --git a/docs/reference/esql/esql-query-api.asciidoc b/docs/reference/esql/esql-query-api.asciidoc
index 2cdd97ceab176..e8cfa03e3ee88 100644
--- a/docs/reference/esql/esql-query-api.asciidoc
+++ b/docs/reference/esql/esql-query-api.asciidoc
@@ -75,6 +75,11 @@ For syntax, refer to <>.
(Optional, array) Values for parameters in the `query`. For syntax, refer to
<>.
+`profile`::
+(Optional, boolean) If provided and `true` the response will include an extra `profile` object
+with information about how the query was executed. It provides insight into the performance
+of each part of the query. This is for human debugging as the object's format might change at any time.
+
`query`::
(Required, string) {esql} query to run. For syntax, refer to <>.
@@ -100,3 +105,8 @@ returned if `drop_null_columns` is sent with the request.
`rows`::
(array of arrays)
Values for the search results.
+
+`profile`::
+(object)
+Profile describing the execution of the query. Only returned if `profile` was sent in the body.
+The object itself is for human debugging and can change at any time.
diff --git a/docs/reference/esql/esql-rest.asciidoc b/docs/reference/esql/esql-rest.asciidoc
index de2b6dedd8776..5b90e96d7a734 100644
--- a/docs/reference/esql/esql-rest.asciidoc
+++ b/docs/reference/esql/esql-rest.asciidoc
@@ -111,6 +111,9 @@ s|Description
|{wikipedia}/Smile_(data_interchange_format)[Smile] binary data format similar
to CBOR
+|arrow
+|application/vnd.apache.arrow.stream
+|**Experimental.** https://arrow.apache.org/[Apache Arrow] dataframes, https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format[IPC streaming format]
|===
The `csv` format accepts a formatting URL query attribute, `delimiter`, which
diff --git a/docs/reference/esql/functions/aggregation-functions.asciidoc b/docs/reference/esql/functions/aggregation-functions.asciidoc
index 074fcce9ad43d..82931b84fd44a 100644
--- a/docs/reference/esql/functions/aggregation-functions.asciidoc
+++ b/docs/reference/esql/functions/aggregation-functions.asciidoc
@@ -8,27 +8,31 @@
The <> command supports these aggregate functions:
// tag::agg_list[]
-* <>
+* <>
* <>
* <>
-* <>
+* <>
* <>
* <>
-* <>
+* <>
* <>
* experimental:[] <>
* <>
+* <>
* <>
+* experimental:[] <>
// end::agg_list[]
-include::avg.asciidoc[]
include::count.asciidoc[]
include::count-distinct.asciidoc[]
-include::max.asciidoc[]
include::median.asciidoc[]
include::median-absolute-deviation.asciidoc[]
-include::min.asciidoc[]
include::percentile.asciidoc[]
include::st_centroid_agg.asciidoc[]
include::sum.asciidoc[]
+include::layout/avg.asciidoc[]
+include::layout/max.asciidoc[]
+include::layout/min.asciidoc[]
+include::layout/top.asciidoc[]
include::values.asciidoc[]
+include::weighted-avg.asciidoc[]
diff --git a/docs/reference/esql/functions/avg.asciidoc b/docs/reference/esql/functions/avg.asciidoc
deleted file mode 100644
index 7eadff29f1bfc..0000000000000
--- a/docs/reference/esql/functions/avg.asciidoc
+++ /dev/null
@@ -1,47 +0,0 @@
-[discrete]
-[[esql-agg-avg]]
-=== `AVG`
-
-*Syntax*
-
-[source,esql]
-----
-AVG(expression)
-----
-
-`expression`::
-Numeric expression.
-//If `null`, the function returns `null`.
-// TODO: Remove comment when https://github.com/elastic/elasticsearch/issues/104900 is fixed.
-
-*Description*
-
-The average of a numeric expression.
-
-*Supported types*
-
-The result is always a `double` no matter the input type.
-
-*Examples*
-
-[source.merge.styled,esql]
-----
-include::{esql-specs}/stats.csv-spec[tag=avg]
-----
-[%header.monospaced.styled,format=dsv,separator=|]
-|===
-include::{esql-specs}/stats.csv-spec[tag=avg-result]
-|===
-
-The expression can use inline functions. For example, to calculate the average
-over a multivalued column, first use `MV_AVG` to average the multiple values per
-row, and use the result with the `AVG` function:
-
-[source.merge.styled,esql]
-----
-include::{esql-specs}/stats.csv-spec[tag=docsStatsAvgNestedExpression]
-----
-[%header.monospaced.styled,format=dsv,separator=|]
-|===
-include::{esql-specs}/stats.csv-spec[tag=docsStatsAvgNestedExpression-result]
-|===
diff --git a/docs/reference/esql/functions/binary.asciidoc b/docs/reference/esql/functions/binary.asciidoc
index 959bbe11c040e..72d466ae83d11 100644
--- a/docs/reference/esql/functions/binary.asciidoc
+++ b/docs/reference/esql/functions/binary.asciidoc
@@ -7,6 +7,12 @@
[.text-center]
image::esql/functions/signature/equals.svg[Embedded,opts=inline]
+Check if two fields are equal. If either field is <> then
+the result is `null`.
+
+NOTE: This is pushed to the underlying search index if one side of the comparison is constant
+ and the other side is a field in the index that has both an <> and <>.
+
Supported types:
include::types/equals.asciidoc[]
@@ -15,6 +21,12 @@ include::types/equals.asciidoc[]
[.text-center]
image::esql/functions/signature/not_equals.svg[Embedded,opts=inline]
+Check if two fields are unequal. If either field is <> then
+the result is `null`.
+
+NOTE: This is pushed to the underlying search index if one side of the comparison is constant
+ and the other side is a field in the index that has both an <> and <>.
+
Supported types:
include::types/not_equals.asciidoc[]
@@ -23,55 +35,112 @@ include::types/not_equals.asciidoc[]
[.text-center]
image::esql/functions/signature/less_than.svg[Embedded,opts=inline]
+Check if one field is less than another. If either field is <>
+then the result is `null`.
+
+NOTE: This is pushed to the underlying search index if one side of the comparison is constant
+ and the other side is a field in the index that has both an <> and <>.
+
+Supported types:
+
include::types/less_than.asciidoc[]
==== Less than or equal to `<=`
[.text-center]
image::esql/functions/signature/less_than_or_equal.svg[Embedded,opts=inline]
+Check if one field is less than or equal to another. If either field is <>
+then the result is `null`.
+
+NOTE: This is pushed to the underlying search index if one side of the comparison is constant
+ and the other side is a field in the index that has both an <> and <>.
+
+Supported types:
+
include::types/less_than_or_equal.asciidoc[]
==== Greater than `>`
[.text-center]
image::esql/functions/signature/greater_than.svg[Embedded,opts=inline]
+Check if one field is greater than another. If either field is <>
+then the result is `null`.
+
+NOTE: This is pushed to the underlying search index if one side of the comparison is constant
+ and the other side is a field in the index that has both an <> and <>.
+
+Supported types:
+
include::types/greater_than.asciidoc[]
==== Greater than or equal to `>=`
[.text-center]
image::esql/functions/signature/greater_than_or_equal.svg[Embedded,opts=inline]
+Check if one field is greater than or equal to another. If either field is <>
+then the result is `null`.
+
+NOTE: This is pushed to the underlying search index if one side of the comparison is constant
+ and the other side is a field in the index that has both an <> and <>.
+
+Supported types:
+
include::types/greater_than_or_equal.asciidoc[]
==== Add `+`
[.text-center]
image::esql/functions/signature/add.svg[Embedded,opts=inline]
+Add two numbers together. If either field is <>
+then the result is `null`.
+
+Supported types:
+
include::types/add.asciidoc[]
==== Subtract `-`
[.text-center]
image::esql/functions/signature/sub.svg[Embedded,opts=inline]
+Subtract one number from another. If either field is <>
+then the result is `null`.
+
+Supported types:
+
include::types/sub.asciidoc[]
==== Multiply `*`
[.text-center]
image::esql/functions/signature/mul.svg[Embedded,opts=inline]
+Multiply two numbers together. If either field is <>
+then the result is `null`.
+
+Supported types:
+
include::types/mul.asciidoc[]
==== Divide `/`
[.text-center]
image::esql/functions/signature/div.svg[Embedded,opts=inline]
+Divide one number by another. If either field is <>
+then the result is `null`.
+
NOTE: Division of two integer types will yield an integer result, rounding towards 0.
If you need floating point division, <> one of the arguments to a `DOUBLE`.
+Supported types:
+
include::types/div.asciidoc[]
==== Modulus `%`
[.text-center]
image::esql/functions/signature/mod.svg[Embedded,opts=inline]
+Divide one number by another and return the remainder. If either field is <>
+then the result is `null`.
+
+Supported types:
+
include::types/mod.asciidoc[]
diff --git a/docs/reference/esql/functions/description/avg.asciidoc b/docs/reference/esql/functions/description/avg.asciidoc
new file mode 100644
index 0000000000000..545d7e8394e8b
--- /dev/null
+++ b/docs/reference/esql/functions/description/avg.asciidoc
@@ -0,0 +1,5 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Description*
+
+The average of a numeric field.
diff --git a/docs/reference/esql/functions/description/locate.asciidoc b/docs/reference/esql/functions/description/locate.asciidoc
index 60a6d435e37b6..e5a6fba512432 100644
--- a/docs/reference/esql/functions/description/locate.asciidoc
+++ b/docs/reference/esql/functions/description/locate.asciidoc
@@ -2,4 +2,4 @@
*Description*
-Returns an integer that indicates the position of a keyword substring within another string
+Returns an integer that indicates the position of a keyword substring within another string.
diff --git a/docs/reference/esql/functions/description/max.asciidoc b/docs/reference/esql/functions/description/max.asciidoc
new file mode 100644
index 0000000000000..27a76ed69c3c0
--- /dev/null
+++ b/docs/reference/esql/functions/description/max.asciidoc
@@ -0,0 +1,5 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Description*
+
+The maximum value of a field.
diff --git a/docs/reference/esql/functions/description/min.asciidoc b/docs/reference/esql/functions/description/min.asciidoc
new file mode 100644
index 0000000000000..406125b5761d1
--- /dev/null
+++ b/docs/reference/esql/functions/description/min.asciidoc
@@ -0,0 +1,5 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Description*
+
+The minimum value of a field.
diff --git a/docs/reference/esql/functions/description/st_distance.asciidoc b/docs/reference/esql/functions/description/st_distance.asciidoc
new file mode 100644
index 0000000000000..b27fcef0eb4f7
--- /dev/null
+++ b/docs/reference/esql/functions/description/st_distance.asciidoc
@@ -0,0 +1,5 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Description*
+
+Computes the distance between two points. For cartesian geometries, this is the pythagorean distance in the same units as the original coordinates. For geographic geometries, this is the circular distance along the great circle in meters.
diff --git a/docs/reference/esql/functions/description/substring.asciidoc b/docs/reference/esql/functions/description/substring.asciidoc
index edb97b219bbe0..3d8091f26c04d 100644
--- a/docs/reference/esql/functions/description/substring.asciidoc
+++ b/docs/reference/esql/functions/description/substring.asciidoc
@@ -2,4 +2,4 @@
*Description*
-Returns a substring of a string, specified by a start position and an optional length
+Returns a substring of a string, specified by a start position and an optional length.
diff --git a/docs/reference/esql/functions/description/top.asciidoc b/docs/reference/esql/functions/description/top.asciidoc
new file mode 100644
index 0000000000000..39b31e17aec55
--- /dev/null
+++ b/docs/reference/esql/functions/description/top.asciidoc
@@ -0,0 +1,5 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Description*
+
+Collects the top values for a field. Includes repeated values.
diff --git a/docs/reference/esql/functions/examples/avg.asciidoc b/docs/reference/esql/functions/examples/avg.asciidoc
new file mode 100644
index 0000000000000..b6193ad50ed21
--- /dev/null
+++ b/docs/reference/esql/functions/examples/avg.asciidoc
@@ -0,0 +1,22 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Examples*
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/stats.csv-spec[tag=avg]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/stats.csv-spec[tag=avg-result]
+|===
+The expression can use inline functions. For example, to calculate the average over a multivalued column, first use `MV_AVG` to average the multiple values per row, and use the result with the `AVG` function
+[source.merge.styled,esql]
+----
+include::{esql-specs}/stats.csv-spec[tag=docsStatsAvgNestedExpression]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/stats.csv-spec[tag=docsStatsAvgNestedExpression-result]
+|===
+
diff --git a/docs/reference/esql/functions/max.asciidoc b/docs/reference/esql/functions/examples/max.asciidoc
similarity index 55%
rename from docs/reference/esql/functions/max.asciidoc
rename to docs/reference/esql/functions/examples/max.asciidoc
index f2e0d0a0205b3..dc57118931ef7 100644
--- a/docs/reference/esql/functions/max.asciidoc
+++ b/docs/reference/esql/functions/examples/max.asciidoc
@@ -1,24 +1,6 @@
-[discrete]
-[[esql-agg-max]]
-=== `MAX`
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
-*Syntax*
-
-[source,esql]
-----
-MAX(expression)
-----
-
-*Parameters*
-
-`expression`::
-Expression from which to return the maximum value.
-
-*Description*
-
-Returns the maximum value of a numeric expression.
-
-*Example*
+*Examples*
[source.merge.styled,esql]
----
@@ -28,11 +10,7 @@ include::{esql-specs}/stats.csv-spec[tag=max]
|===
include::{esql-specs}/stats.csv-spec[tag=max-result]
|===
-
-The expression can use inline functions. For example, to calculate the maximum
-over an average of a multivalued column, use `MV_AVG` to first average the
-multiple values per row, and use the result with the `MAX` function:
-
+The expression can use inline functions. For example, to calculate the maximum over an average of a multivalued column, use `MV_AVG` to first average the multiple values per row, and use the result with the `MAX` function
[source.merge.styled,esql]
----
include::{esql-specs}/stats.csv-spec[tag=docsStatsMaxNestedExpression]
@@ -40,4 +18,5 @@ include::{esql-specs}/stats.csv-spec[tag=docsStatsMaxNestedExpression]
[%header.monospaced.styled,format=dsv,separator=|]
|===
include::{esql-specs}/stats.csv-spec[tag=docsStatsMaxNestedExpression-result]
-|===
\ No newline at end of file
+|===
+
diff --git a/docs/reference/esql/functions/min.asciidoc b/docs/reference/esql/functions/examples/min.asciidoc
similarity index 55%
rename from docs/reference/esql/functions/min.asciidoc
rename to docs/reference/esql/functions/examples/min.asciidoc
index 313822818128c..b4088196d750b 100644
--- a/docs/reference/esql/functions/min.asciidoc
+++ b/docs/reference/esql/functions/examples/min.asciidoc
@@ -1,24 +1,6 @@
-[discrete]
-[[esql-agg-min]]
-=== `MIN`
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
-*Syntax*
-
-[source,esql]
-----
-MIN(expression)
-----
-
-*Parameters*
-
-`expression`::
-Expression from which to return the minimum value.
-
-*Description*
-
-Returns the minimum value of a numeric expression.
-
-*Example*
+*Examples*
[source.merge.styled,esql]
----
@@ -28,11 +10,7 @@ include::{esql-specs}/stats.csv-spec[tag=min]
|===
include::{esql-specs}/stats.csv-spec[tag=min-result]
|===
-
-The expression can use inline functions. For example, to calculate the minimum
-over an average of a multivalued column, use `MV_AVG` to first average the
-multiple values per row, and use the result with the `MIN` function:
-
+The expression can use inline functions. For example, to calculate the minimum over an average of a multivalued column, use `MV_AVG` to first average the multiple values per row, and use the result with the `MIN` function
[source.merge.styled,esql]
----
include::{esql-specs}/stats.csv-spec[tag=docsStatsMinNestedExpression]
@@ -41,3 +19,4 @@ include::{esql-specs}/stats.csv-spec[tag=docsStatsMinNestedExpression]
|===
include::{esql-specs}/stats.csv-spec[tag=docsStatsMinNestedExpression-result]
|===
+
diff --git a/docs/reference/esql/functions/examples/st_distance.asciidoc b/docs/reference/esql/functions/examples/st_distance.asciidoc
new file mode 100644
index 0000000000000..60da852eff736
--- /dev/null
+++ b/docs/reference/esql/functions/examples/st_distance.asciidoc
@@ -0,0 +1,13 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Example*
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/spatial.csv-spec[tag=st_distance-airports]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/spatial.csv-spec[tag=st_distance-airports-result]
+|===
+
diff --git a/docs/reference/esql/functions/examples/top.asciidoc b/docs/reference/esql/functions/examples/top.asciidoc
new file mode 100644
index 0000000000000..3d48d3c346c9e
--- /dev/null
+++ b/docs/reference/esql/functions/examples/top.asciidoc
@@ -0,0 +1,13 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Example*
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/stats_top.csv-spec[tag=top]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/stats_top.csv-spec[tag=top-result]
+|===
+
diff --git a/docs/reference/esql/functions/ip-functions.asciidoc b/docs/reference/esql/functions/ip-functions.asciidoc
index 55c808e587a18..0d58e24c02945 100644
--- a/docs/reference/esql/functions/ip-functions.asciidoc
+++ b/docs/reference/esql/functions/ip-functions.asciidoc
@@ -9,6 +9,8 @@
// tag::ip_list[]
* <>
+* <>
// end::ip_list[]
include::layout/cidr_match.asciidoc[]
+include::layout/ip_prefix.asciidoc[]
diff --git a/docs/reference/esql/functions/kibana/definition/avg.json b/docs/reference/esql/functions/kibana/definition/avg.json
new file mode 100644
index 0000000000000..eb0be684a468e
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/definition/avg.json
@@ -0,0 +1,48 @@
+{
+ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
+ "type" : "agg",
+ "name" : "avg",
+ "description" : "The average of a numeric field.",
+ "signatures" : [
+ {
+ "params" : [
+ {
+ "name" : "number",
+ "type" : "double",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ },
+ {
+ "params" : [
+ {
+ "name" : "number",
+ "type" : "integer",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ },
+ {
+ "params" : [
+ {
+ "name" : "number",
+ "type" : "long",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ }
+ ],
+ "examples" : [
+ "FROM employees\n| STATS AVG(height)",
+ "FROM employees\n| STATS avg_salary_change = ROUND(AVG(MV_AVG(salary_change)), 10)"
+ ]
+}
diff --git a/docs/reference/esql/functions/kibana/definition/locate.json b/docs/reference/esql/functions/kibana/definition/locate.json
index 13b7512e17def..2097c90b41958 100644
--- a/docs/reference/esql/functions/kibana/definition/locate.json
+++ b/docs/reference/esql/functions/kibana/definition/locate.json
@@ -2,7 +2,7 @@
"comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
"type" : "eval",
"name" : "locate",
- "description" : "Returns an integer that indicates the position of a keyword substring within another string",
+ "description" : "Returns an integer that indicates the position of a keyword substring within another string.",
"signatures" : [
{
"params" : [
diff --git a/docs/reference/esql/functions/kibana/definition/max.json b/docs/reference/esql/functions/kibana/definition/max.json
new file mode 100644
index 0000000000000..bc7380bd76dd4
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/definition/max.json
@@ -0,0 +1,72 @@
+{
+ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
+ "type" : "agg",
+ "name" : "max",
+ "description" : "The maximum value of a field.",
+ "signatures" : [
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "boolean",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "boolean"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "datetime",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "datetime"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "double",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "integer",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "integer"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "long",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "long"
+ }
+ ],
+ "examples" : [
+ "FROM employees\n| STATS MAX(languages)",
+ "FROM employees\n| STATS max_avg_salary_change = MAX(MV_AVG(salary_change))"
+ ]
+}
diff --git a/docs/reference/esql/functions/kibana/definition/min.json b/docs/reference/esql/functions/kibana/definition/min.json
new file mode 100644
index 0000000000000..937391bf242ac
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/definition/min.json
@@ -0,0 +1,72 @@
+{
+ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
+ "type" : "agg",
+ "name" : "min",
+ "description" : "The minimum value of a field.",
+ "signatures" : [
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "boolean",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "boolean"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "datetime",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "datetime"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "double",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "integer",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "integer"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "long",
+ "optional" : false,
+ "description" : ""
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "long"
+ }
+ ],
+ "examples" : [
+ "FROM employees\n| STATS MIN(languages)",
+ "FROM employees\n| STATS min_avg_salary_change = MIN(MV_AVG(salary_change))"
+ ]
+}
diff --git a/docs/reference/esql/functions/kibana/definition/st_distance.json b/docs/reference/esql/functions/kibana/definition/st_distance.json
new file mode 100644
index 0000000000000..448e0d54051da
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/definition/st_distance.json
@@ -0,0 +1,47 @@
+{
+ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
+ "type" : "eval",
+ "name" : "st_distance",
+ "description" : "Computes the distance between two points.\nFor cartesian geometries, this is the pythagorean distance in the same units as the original coordinates.\nFor geographic geometries, this is the circular distance along the great circle in meters.",
+ "signatures" : [
+ {
+ "params" : [
+ {
+ "name" : "geomA",
+ "type" : "cartesian_point",
+ "optional" : false,
+ "description" : "Expression of type `geo_point` or `cartesian_point`. If `null`, the function returns `null`."
+ },
+ {
+ "name" : "geomB",
+ "type" : "cartesian_point",
+ "optional" : false,
+ "description" : "Expression of type `geo_point` or `cartesian_point`. If `null`, the function returns `null`. The second parameter must also have the same coordinate system as the first. This means it is not possible to combine `geo_point` and `cartesian_point` parameters."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ },
+ {
+ "params" : [
+ {
+ "name" : "geomA",
+ "type" : "geo_point",
+ "optional" : false,
+ "description" : "Expression of type `geo_point` or `cartesian_point`. If `null`, the function returns `null`."
+ },
+ {
+ "name" : "geomB",
+ "type" : "geo_point",
+ "optional" : false,
+ "description" : "Expression of type `geo_point` or `cartesian_point`. If `null`, the function returns `null`. The second parameter must also have the same coordinate system as the first. This means it is not possible to combine `geo_point` and `cartesian_point` parameters."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ }
+ ],
+ "examples" : [
+ "FROM airports\n| WHERE abbrev == \"CPH\"\n| EVAL distance = ST_DISTANCE(location, city_location)\n| KEEP abbrev, name, location, city_location, distance"
+ ]
+}
diff --git a/docs/reference/esql/functions/kibana/definition/substring.json b/docs/reference/esql/functions/kibana/definition/substring.json
index 25f432796cc8d..b38b545822a90 100644
--- a/docs/reference/esql/functions/kibana/definition/substring.json
+++ b/docs/reference/esql/functions/kibana/definition/substring.json
@@ -2,7 +2,7 @@
"comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
"type" : "eval",
"name" : "substring",
- "description" : "Returns a substring of a string, specified by a start position and an optional length",
+ "description" : "Returns a substring of a string, specified by a start position and an optional length.",
"signatures" : [
{
"params" : [
diff --git a/docs/reference/esql/functions/kibana/definition/top.json b/docs/reference/esql/functions/kibana/definition/top.json
new file mode 100644
index 0000000000000..7ad073d6e7564
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/definition/top.json
@@ -0,0 +1,107 @@
+{
+ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
+ "type" : "agg",
+ "name" : "top",
+ "description" : "Collects the top values for a field. Includes repeated values.",
+ "signatures" : [
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "datetime",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "datetime"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "double",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "integer"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "long",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "long"
+ }
+ ],
+ "examples" : [
+ "FROM employees\n| STATS top_salaries = TOP(salary, 3, \"desc\"), top_salary = MAX(salary)"
+ ]
+}
diff --git a/docs/reference/esql/functions/kibana/docs/avg.md b/docs/reference/esql/functions/kibana/docs/avg.md
new file mode 100644
index 0000000000000..54006a0556175
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/docs/avg.md
@@ -0,0 +1,11 @@
+
+
+### AVG
+The average of a numeric field.
+
+```
+FROM employees
+| STATS AVG(height)
+```
diff --git a/docs/reference/esql/functions/kibana/docs/locate.md b/docs/reference/esql/functions/kibana/docs/locate.md
index 7fffbfd548f20..75275068d3096 100644
--- a/docs/reference/esql/functions/kibana/docs/locate.md
+++ b/docs/reference/esql/functions/kibana/docs/locate.md
@@ -3,7 +3,7 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ
-->
### LOCATE
-Returns an integer that indicates the position of a keyword substring within another string
+Returns an integer that indicates the position of a keyword substring within another string.
```
row a = "hello"
diff --git a/docs/reference/esql/functions/kibana/docs/max.md b/docs/reference/esql/functions/kibana/docs/max.md
new file mode 100644
index 0000000000000..80e88885e7f34
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/docs/max.md
@@ -0,0 +1,11 @@
+
+
+### MAX
+The maximum value of a field.
+
+```
+FROM employees
+| STATS MAX(languages)
+```
diff --git a/docs/reference/esql/functions/kibana/docs/min.md b/docs/reference/esql/functions/kibana/docs/min.md
new file mode 100644
index 0000000000000..38d13b97fd344
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/docs/min.md
@@ -0,0 +1,11 @@
+
+
+### MIN
+The minimum value of a field.
+
+```
+FROM employees
+| STATS MIN(languages)
+```
diff --git a/docs/reference/esql/functions/kibana/docs/st_distance.md b/docs/reference/esql/functions/kibana/docs/st_distance.md
new file mode 100644
index 0000000000000..7ea2d5a255357
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/docs/st_distance.md
@@ -0,0 +1,15 @@
+
+
+### ST_DISTANCE
+Computes the distance between two points.
+For cartesian geometries, this is the pythagorean distance in the same units as the original coordinates.
+For geographic geometries, this is the circular distance along the great circle in meters.
+
+```
+FROM airports
+| WHERE abbrev == "CPH"
+| EVAL distance = ST_DISTANCE(location, city_location)
+| KEEP abbrev, name, location, city_location, distance
+```
diff --git a/docs/reference/esql/functions/kibana/docs/substring.md b/docs/reference/esql/functions/kibana/docs/substring.md
index 62c4eb33c2e95..5f2601a279f6f 100644
--- a/docs/reference/esql/functions/kibana/docs/substring.md
+++ b/docs/reference/esql/functions/kibana/docs/substring.md
@@ -3,7 +3,7 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ
-->
### SUBSTRING
-Returns a substring of a string, specified by a start position and an optional length
+Returns a substring of a string, specified by a start position and an optional length.
```
FROM employees
diff --git a/docs/reference/esql/functions/kibana/docs/top.md b/docs/reference/esql/functions/kibana/docs/top.md
new file mode 100644
index 0000000000000..10db4e7ac5b55
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/docs/top.md
@@ -0,0 +1,11 @@
+
+
+### TOP
+Collects the top values for a field. Includes repeated values.
+
+```
+FROM employees
+| STATS top_salaries = TOP(salary, 3, "desc"), top_salary = MAX(salary)
+```
diff --git a/docs/reference/esql/functions/layout/avg.asciidoc b/docs/reference/esql/functions/layout/avg.asciidoc
new file mode 100644
index 0000000000000..8292af8e75554
--- /dev/null
+++ b/docs/reference/esql/functions/layout/avg.asciidoc
@@ -0,0 +1,15 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+[discrete]
+[[esql-avg]]
+=== `AVG`
+
+*Syntax*
+
+[.text-center]
+image::esql/functions/signature/avg.svg[Embedded,opts=inline]
+
+include::../parameters/avg.asciidoc[]
+include::../description/avg.asciidoc[]
+include::../types/avg.asciidoc[]
+include::../examples/avg.asciidoc[]
diff --git a/docs/reference/esql/functions/layout/max.asciidoc b/docs/reference/esql/functions/layout/max.asciidoc
new file mode 100644
index 0000000000000..a4eb3d99c0d02
--- /dev/null
+++ b/docs/reference/esql/functions/layout/max.asciidoc
@@ -0,0 +1,15 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+[discrete]
+[[esql-max]]
+=== `MAX`
+
+*Syntax*
+
+[.text-center]
+image::esql/functions/signature/max.svg[Embedded,opts=inline]
+
+include::../parameters/max.asciidoc[]
+include::../description/max.asciidoc[]
+include::../types/max.asciidoc[]
+include::../examples/max.asciidoc[]
diff --git a/docs/reference/esql/functions/layout/min.asciidoc b/docs/reference/esql/functions/layout/min.asciidoc
new file mode 100644
index 0000000000000..60ad2cc21b561
--- /dev/null
+++ b/docs/reference/esql/functions/layout/min.asciidoc
@@ -0,0 +1,15 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+[discrete]
+[[esql-min]]
+=== `MIN`
+
+*Syntax*
+
+[.text-center]
+image::esql/functions/signature/min.svg[Embedded,opts=inline]
+
+include::../parameters/min.asciidoc[]
+include::../description/min.asciidoc[]
+include::../types/min.asciidoc[]
+include::../examples/min.asciidoc[]
diff --git a/docs/reference/esql/functions/layout/st_distance.asciidoc b/docs/reference/esql/functions/layout/st_distance.asciidoc
new file mode 100644
index 0000000000000..159b071ce63a7
--- /dev/null
+++ b/docs/reference/esql/functions/layout/st_distance.asciidoc
@@ -0,0 +1,15 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+[discrete]
+[[esql-st_distance]]
+=== `ST_DISTANCE`
+
+*Syntax*
+
+[.text-center]
+image::esql/functions/signature/st_distance.svg[Embedded,opts=inline]
+
+include::../parameters/st_distance.asciidoc[]
+include::../description/st_distance.asciidoc[]
+include::../types/st_distance.asciidoc[]
+include::../examples/st_distance.asciidoc[]
diff --git a/docs/reference/esql/functions/layout/top.asciidoc b/docs/reference/esql/functions/layout/top.asciidoc
new file mode 100644
index 0000000000000..a29a7c96a3697
--- /dev/null
+++ b/docs/reference/esql/functions/layout/top.asciidoc
@@ -0,0 +1,15 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+[discrete]
+[[esql-top]]
+=== `TOP`
+
+*Syntax*
+
+[.text-center]
+image::esql/functions/signature/top.svg[Embedded,opts=inline]
+
+include::../parameters/top.asciidoc[]
+include::../description/top.asciidoc[]
+include::../types/top.asciidoc[]
+include::../examples/top.asciidoc[]
diff --git a/docs/reference/esql/functions/parameters/avg.asciidoc b/docs/reference/esql/functions/parameters/avg.asciidoc
new file mode 100644
index 0000000000000..91c56709d182a
--- /dev/null
+++ b/docs/reference/esql/functions/parameters/avg.asciidoc
@@ -0,0 +1,6 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Parameters*
+
+`number`::
+
diff --git a/docs/reference/esql/functions/parameters/max.asciidoc b/docs/reference/esql/functions/parameters/max.asciidoc
new file mode 100644
index 0000000000000..8903aa1a472a3
--- /dev/null
+++ b/docs/reference/esql/functions/parameters/max.asciidoc
@@ -0,0 +1,6 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Parameters*
+
+`field`::
+
diff --git a/docs/reference/esql/functions/parameters/min.asciidoc b/docs/reference/esql/functions/parameters/min.asciidoc
new file mode 100644
index 0000000000000..8903aa1a472a3
--- /dev/null
+++ b/docs/reference/esql/functions/parameters/min.asciidoc
@@ -0,0 +1,6 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Parameters*
+
+`field`::
+
diff --git a/docs/reference/esql/functions/parameters/st_distance.asciidoc b/docs/reference/esql/functions/parameters/st_distance.asciidoc
new file mode 100644
index 0000000000000..f32433dfbf6fb
--- /dev/null
+++ b/docs/reference/esql/functions/parameters/st_distance.asciidoc
@@ -0,0 +1,9 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Parameters*
+
+`geomA`::
+Expression of type `geo_point` or `cartesian_point`. If `null`, the function returns `null`.
+
+`geomB`::
+Expression of type `geo_point` or `cartesian_point`. If `null`, the function returns `null`. The second parameter must also have the same coordinate system as the first. This means it is not possible to combine `geo_point` and `cartesian_point` parameters.
diff --git a/docs/reference/esql/functions/parameters/top.asciidoc b/docs/reference/esql/functions/parameters/top.asciidoc
new file mode 100644
index 0000000000000..979bca393b5aa
--- /dev/null
+++ b/docs/reference/esql/functions/parameters/top.asciidoc
@@ -0,0 +1,12 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Parameters*
+
+`field`::
+The field to collect the top values for.
+
+`limit`::
+The maximum number of values to collect.
+
+`order`::
+The order to calculate the top values. Either `asc` or `desc`.
diff --git a/docs/reference/esql/functions/signature/avg.svg b/docs/reference/esql/functions/signature/avg.svg
new file mode 100644
index 0000000000000..f325358aff960
--- /dev/null
+++ b/docs/reference/esql/functions/signature/avg.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/reference/esql/functions/signature/max.svg b/docs/reference/esql/functions/signature/max.svg
new file mode 100644
index 0000000000000..dda43dfbfbba2
--- /dev/null
+++ b/docs/reference/esql/functions/signature/max.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/reference/esql/functions/signature/min.svg b/docs/reference/esql/functions/signature/min.svg
new file mode 100644
index 0000000000000..e654d3027fee8
--- /dev/null
+++ b/docs/reference/esql/functions/signature/min.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/reference/esql/functions/signature/st_distance.svg b/docs/reference/esql/functions/signature/st_distance.svg
new file mode 100644
index 0000000000000..1831a139a719f
--- /dev/null
+++ b/docs/reference/esql/functions/signature/st_distance.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/reference/esql/functions/signature/top.svg b/docs/reference/esql/functions/signature/top.svg
new file mode 100644
index 0000000000000..cfd15e0d94ac4
--- /dev/null
+++ b/docs/reference/esql/functions/signature/top.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/reference/esql/functions/spatial-functions.asciidoc b/docs/reference/esql/functions/spatial-functions.asciidoc
index d143681fcf2f2..79acc2028d983 100644
--- a/docs/reference/esql/functions/spatial-functions.asciidoc
+++ b/docs/reference/esql/functions/spatial-functions.asciidoc
@@ -14,6 +14,7 @@
* experimental:[] <>
* experimental:[] <>
* experimental:[] <>
+* experimental:[] <>
// end::spatial_list[]
include::layout/st_intersects.asciidoc[]
@@ -22,3 +23,4 @@ include::layout/st_contains.asciidoc[]
include::layout/st_within.asciidoc[]
include::layout/st_x.asciidoc[]
include::layout/st_y.asciidoc[]
+include::layout/st_distance.asciidoc[]
diff --git a/docs/reference/esql/functions/types/avg.asciidoc b/docs/reference/esql/functions/types/avg.asciidoc
new file mode 100644
index 0000000000000..273dae4af76c2
--- /dev/null
+++ b/docs/reference/esql/functions/types/avg.asciidoc
@@ -0,0 +1,11 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Supported types*
+
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+number | result
+double | double
+integer | double
+long | double
+|===
diff --git a/docs/reference/esql/functions/types/max.asciidoc b/docs/reference/esql/functions/types/max.asciidoc
new file mode 100644
index 0000000000000..6515c6bfc48d2
--- /dev/null
+++ b/docs/reference/esql/functions/types/max.asciidoc
@@ -0,0 +1,13 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Supported types*
+
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+field | result
+boolean | boolean
+datetime | datetime
+double | double
+integer | integer
+long | long
+|===
diff --git a/docs/reference/esql/functions/types/min.asciidoc b/docs/reference/esql/functions/types/min.asciidoc
new file mode 100644
index 0000000000000..6515c6bfc48d2
--- /dev/null
+++ b/docs/reference/esql/functions/types/min.asciidoc
@@ -0,0 +1,13 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Supported types*
+
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+field | result
+boolean | boolean
+datetime | datetime
+double | double
+integer | integer
+long | long
+|===
diff --git a/docs/reference/esql/functions/types/st_distance.asciidoc b/docs/reference/esql/functions/types/st_distance.asciidoc
new file mode 100644
index 0000000000000..c6ae485f3f535
--- /dev/null
+++ b/docs/reference/esql/functions/types/st_distance.asciidoc
@@ -0,0 +1,10 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Supported types*
+
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+geomA | geomB | result
+cartesian_point | cartesian_point | double
+geo_point | geo_point | double
+|===
diff --git a/docs/reference/esql/functions/types/top.asciidoc b/docs/reference/esql/functions/types/top.asciidoc
new file mode 100644
index 0000000000000..1874cd8b12bf3
--- /dev/null
+++ b/docs/reference/esql/functions/types/top.asciidoc
@@ -0,0 +1,12 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Supported types*
+
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+field | limit | order | result
+datetime | integer | keyword | datetime
+double | integer | keyword | double
+integer | integer | keyword | integer
+long | integer | keyword | long
+|===
diff --git a/docs/reference/esql/functions/weighted-avg.asciidoc b/docs/reference/esql/functions/weighted-avg.asciidoc
new file mode 100644
index 0000000000000..4f166801641df
--- /dev/null
+++ b/docs/reference/esql/functions/weighted-avg.asciidoc
@@ -0,0 +1,35 @@
+[discrete]
+[[esql-agg-weighted-avg]]
+=== `WEIGHTED_AVG`
+
+*Syntax*
+
+[source,esql]
+----
+WEIGHTED_AVG(expression, weight)
+----
+
+`expression`::
+Numeric expression.
+
+`weight`::
+Numeric weight.
+
+*Description*
+
+The weighted average of a numeric expression.
+
+*Supported types*
+
+The result is always a `double` no matter the input type.
+
+*Examples*
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/stats.csv-spec[tag=weighted-avg]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/stats.csv-spec[tag=weighted-avg-result]
+|===
diff --git a/docs/reference/esql/processing-commands/dissect.asciidoc b/docs/reference/esql/processing-commands/dissect.asciidoc
index c48b72af0de7e..72c811a318a5d 100644
--- a/docs/reference/esql/processing-commands/dissect.asciidoc
+++ b/docs/reference/esql/processing-commands/dissect.asciidoc
@@ -2,6 +2,9 @@
[[esql-dissect]]
=== `DISSECT`
+`DISSECT` enables you to <>.
+
**Syntax**
[source,esql]
@@ -56,4 +59,4 @@ include::{esql-specs}/docs.csv-spec[tag=dissectWithToDatetime]
include::{esql-specs}/docs.csv-spec[tag=dissectWithToDatetime-result]
|===
-// end::examples[]
\ No newline at end of file
+// end::examples[]
diff --git a/docs/reference/esql/processing-commands/drop.asciidoc b/docs/reference/esql/processing-commands/drop.asciidoc
index 8f03141d5e05a..c81f438f81c3b 100644
--- a/docs/reference/esql/processing-commands/drop.asciidoc
+++ b/docs/reference/esql/processing-commands/drop.asciidoc
@@ -2,6 +2,8 @@
[[esql-drop]]
=== `DROP`
+The `DROP` processing command removes one or more columns.
+
**Syntax**
[source,esql]
@@ -14,10 +16,6 @@ DROP columns
`columns`::
A comma-separated list of columns to remove. Supports wildcards.
-*Description*
-
-The `DROP` processing command removes one or more columns.
-
*Examples*
[source,esql]
diff --git a/docs/reference/esql/processing-commands/enrich.asciidoc b/docs/reference/esql/processing-commands/enrich.asciidoc
index 5470d81b2f40b..f34e77dbf5c23 100644
--- a/docs/reference/esql/processing-commands/enrich.asciidoc
+++ b/docs/reference/esql/processing-commands/enrich.asciidoc
@@ -2,6 +2,9 @@
[[esql-enrich]]
=== `ENRICH`
+`ENRICH` enables you to add data from existing indices as new columns using an
+enrich policy.
+
**Syntax**
[source,esql]
diff --git a/docs/reference/esql/processing-commands/eval.asciidoc b/docs/reference/esql/processing-commands/eval.asciidoc
index 9b34fca7ceeff..f77249736c1b3 100644
--- a/docs/reference/esql/processing-commands/eval.asciidoc
+++ b/docs/reference/esql/processing-commands/eval.asciidoc
@@ -2,6 +2,9 @@
[[esql-eval]]
=== `EVAL`
+The `EVAL` processing command enables you to append new columns with calculated
+values.
+
**Syntax**
[source,esql]
diff --git a/docs/reference/esql/processing-commands/grok.asciidoc b/docs/reference/esql/processing-commands/grok.asciidoc
index d5d58a9eaee12..d631d17f7a42c 100644
--- a/docs/reference/esql/processing-commands/grok.asciidoc
+++ b/docs/reference/esql/processing-commands/grok.asciidoc
@@ -2,6 +2,9 @@
[[esql-grok]]
=== `GROK`
+`GROK` enables you to <>.
+
**Syntax**
[source,esql]
diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc
index 57f32a68aec4c..468f459411640 100644
--- a/docs/reference/esql/processing-commands/keep.asciidoc
+++ b/docs/reference/esql/processing-commands/keep.asciidoc
@@ -2,6 +2,9 @@
[[esql-keep]]
=== `KEEP`
+The `KEEP` processing command enables you to specify what columns are returned
+and the order in which they are returned.
+
**Syntax**
[source,esql]
@@ -70,7 +73,7 @@ include::{esql-specs}/docs.csv-spec[tag=keepDoubleWildcard]
include::{esql-specs}/docs.csv-spec[tag=keep-double-wildcard-result]
|===
-The following examples show how precedence rules work when a field name matches multiple expressions.
+The following examples show how precedence rules work when a field name matches multiple expressions.
Complete field name has precedence over wildcard expressions:
diff --git a/docs/reference/esql/processing-commands/limit.asciidoc b/docs/reference/esql/processing-commands/limit.asciidoc
index 4ccf3024a4c1e..78d05672ea095 100644
--- a/docs/reference/esql/processing-commands/limit.asciidoc
+++ b/docs/reference/esql/processing-commands/limit.asciidoc
@@ -2,6 +2,9 @@
[[esql-limit]]
=== `LIMIT`
+The `LIMIT` processing command enables you to limit the number of rows that are
+returned.
+
**Syntax**
[source,esql]
diff --git a/docs/reference/esql/processing-commands/lookup.asciidoc b/docs/reference/esql/processing-commands/lookup.asciidoc
index 1944d243968a8..426527bf4d2d6 100644
--- a/docs/reference/esql/processing-commands/lookup.asciidoc
+++ b/docs/reference/esql/processing-commands/lookup.asciidoc
@@ -4,6 +4,9 @@
experimental::["LOOKUP is a highly experimental and only available in SNAPSHOT versions."]
+`LOOKUP` matches values from the input against a `table` provided in the request,
+adding the other fields from the `table` to the output.
+
**Syntax**
[source,esql]
@@ -19,11 +22,6 @@ The name of the `table` provided in the request to match.
`match_field`::
The fields in the input to match against the table.
-*Description*
-
-`LOOKUP` matches values from the input against a `table` provided in the request,
-adding the other fields from the `table` to the output.
-
*Examples*
// tag::examples[]
diff --git a/docs/reference/esql/processing-commands/mv_expand.asciidoc b/docs/reference/esql/processing-commands/mv_expand.asciidoc
index 9e1cb5573c381..010701f7fc8ee 100644
--- a/docs/reference/esql/processing-commands/mv_expand.asciidoc
+++ b/docs/reference/esql/processing-commands/mv_expand.asciidoc
@@ -4,6 +4,9 @@
preview::[]
+The `MV_EXPAND` processing command expands multivalued columns into one row per
+value, duplicating other columns.
+
**Syntax**
[source,esql]
@@ -16,11 +19,6 @@ MV_EXPAND column
`column`::
The multivalued column to expand.
-*Description*
-
-The `MV_EXPAND` processing command expands multivalued columns into one row per
-value, duplicating other columns.
-
*Example*
[source.merge.styled,esql]
diff --git a/docs/reference/esql/processing-commands/rename.asciidoc b/docs/reference/esql/processing-commands/rename.asciidoc
index 773fe8b640f75..8507a826f085d 100644
--- a/docs/reference/esql/processing-commands/rename.asciidoc
+++ b/docs/reference/esql/processing-commands/rename.asciidoc
@@ -2,6 +2,8 @@
[[esql-rename]]
=== `RENAME`
+The `RENAME` processing command renames one or more columns.
+
**Syntax**
[source,esql]
diff --git a/docs/reference/esql/processing-commands/sort.asciidoc b/docs/reference/esql/processing-commands/sort.asciidoc
index fea7bfaf0c65f..e76b9c76ab273 100644
--- a/docs/reference/esql/processing-commands/sort.asciidoc
+++ b/docs/reference/esql/processing-commands/sort.asciidoc
@@ -2,6 +2,8 @@
[[esql-sort]]
=== `SORT`
+The `SORT` processing command sorts a table on one or more columns.
+
**Syntax**
[source,esql]
diff --git a/docs/reference/esql/processing-commands/stats.asciidoc b/docs/reference/esql/processing-commands/stats.asciidoc
index fe84c56bbfc19..34ae81fd5414e 100644
--- a/docs/reference/esql/processing-commands/stats.asciidoc
+++ b/docs/reference/esql/processing-commands/stats.asciidoc
@@ -2,11 +2,14 @@
[[esql-stats-by]]
=== `STATS ... BY`
+The `STATS ... BY` processing command groups rows according to a common value
+and calculate one or more aggregated values over the grouped rows.
+
**Syntax**
[source,esql]
----
-STATS [column1 =] expression1[, ..., [columnN =] expressionN]
+STATS [column1 =] expression1[, ..., [columnN =] expressionN]
[BY grouping_expression1[, ..., grouping_expressionN]]
----
@@ -39,8 +42,8 @@ NOTE: `STATS` without any groups is much much faster than adding a group.
NOTE: Grouping on a single expression is currently much more optimized than grouping
on many expressions. In some tests we have seen grouping on a single `keyword`
- column to be five times faster than grouping on two `keyword` columns. Do
- not try to work around this by combining the two columns together with
+ column to be five times faster than grouping on two `keyword` columns. Do
+ not try to work around this by combining the two columns together with
something like <> and then grouping - that is not going to be
faster.
@@ -80,14 +83,36 @@ include::{esql-specs}/stats.csv-spec[tag=statsCalcMultipleValues]
include::{esql-specs}/stats.csv-spec[tag=statsCalcMultipleValues-result]
|===
-It's also possible to group by multiple values (only supported for long and
-keyword family fields):
+[[esql-stats-mv-group]]
+If the grouping key is multivalued then the input row is in all groups:
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/stats.csv-spec[tag=mv-group]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/stats.csv-spec[tag=mv-group-result]
+|===
+
+It's also possible to group by multiple values:
[source,esql]
----
include::{esql-specs}/stats.csv-spec[tag=statsGroupByMultipleValues]
----
+If the all grouping keys are multivalued then the input row is in all groups:
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/stats.csv-spec[tag=multi-mv-group]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/stats.csv-spec[tag=multi-mv-group-result]
+|===
+
Both the aggregating functions and the grouping expressions accept other
functions. This is useful for using `STATS...BY` on multivalue columns.
For example, to calculate the average salary change, you can use `MV_AVG` to
diff --git a/docs/reference/esql/processing-commands/where.asciidoc b/docs/reference/esql/processing-commands/where.asciidoc
index 3076f92c40fc0..407df30c57215 100644
--- a/docs/reference/esql/processing-commands/where.asciidoc
+++ b/docs/reference/esql/processing-commands/where.asciidoc
@@ -2,6 +2,9 @@
[[esql-where]]
=== `WHERE`
+The `WHERE` processing command produces a table that contains all the rows from
+the input table for which the provided condition evaluates to `true`.
+
**Syntax**
[source,esql]
@@ -14,11 +17,6 @@ WHERE expression
`expression`::
A boolean expression.
-*Description*
-
-The `WHERE` processing command produces a table that contains all the rows from
-the input table for which the provided condition evaluates to `true`.
-
*Examples*
[source,esql]
@@ -33,7 +31,7 @@ Which, if `still_hired` is a boolean field, can be simplified to:
include::{esql-specs}/docs.csv-spec[tag=whereBoolean]
----
-Use date math to retrieve data from a specific time range. For example, to
+Use date math to retrieve data from a specific time range. For example, to
retrieve the last hour of logs:
[source,esql]
@@ -59,4 +57,4 @@ include::../functions/rlike.asciidoc[tag=body]
include::../functions/in.asciidoc[tag=body]
-For a complete list of all operators, refer to <>.
\ No newline at end of file
+For a complete list of all operators, refer to <>.
diff --git a/docs/reference/esql/source-commands/from.asciidoc b/docs/reference/esql/source-commands/from.asciidoc
index d81c46530e089..6ed517efca7cf 100644
--- a/docs/reference/esql/source-commands/from.asciidoc
+++ b/docs/reference/esql/source-commands/from.asciidoc
@@ -2,6 +2,9 @@
[[esql-from]]
=== `FROM`
+The `FROM` source command returns a table with data from a data stream, index,
+or alias.
+
**Syntax**
[source,esql]
@@ -82,3 +85,11 @@ Use the optional `METADATA` directive to enable <>.
-*Description*
-
-The `ROW` source command produces a row with one or more columns with values
-that you specify. This can be useful for testing.
-
*Examples*
[source.merge.styled,esql]
diff --git a/docs/reference/esql/source-commands/show.asciidoc b/docs/reference/esql/source-commands/show.asciidoc
index 298ea5d8f92b9..7090ab790133f 100644
--- a/docs/reference/esql/source-commands/show.asciidoc
+++ b/docs/reference/esql/source-commands/show.asciidoc
@@ -2,6 +2,9 @@
[[esql-show]]
=== `SHOW`
+The `SHOW` source command returns information about the deployment and
+its capabilities.
+
**Syntax**
[source,esql]
@@ -14,15 +17,10 @@ SHOW item
`item`::
Can only be `INFO`.
-*Description*
-
-The `SHOW` source command returns information about the deployment and
-its capabilities:
-
-* Use `SHOW INFO` to return the deployment's version, build date and hash.
-
*Examples*
+Use `SHOW INFO` to return the deployment's version, build date and hash.
+
[source,esql]
----
SHOW INFO
diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc
index 40b4ff4bb9dc8..24149afe802a2 100644
--- a/docs/reference/index-modules.asciidoc
+++ b/docs/reference/index-modules.asciidoc
@@ -80,7 +80,10 @@ breaking change].
compression ratio, at the expense of slower stored fields performance.
If you are updating the compression type, the new one will be applied
after segments are merged. Segment merging can be forced using
- <>.
+ <>. Experiments with indexing log datasets
+ have shown that `best_compression` gives up to ~18% lower storage usage in
+ the most ideal scenario compared to `default` while only minimally affecting
+ indexing throughput (~2%).
[[index-mode-setting]] `index.mode`::
+
diff --git a/docs/reference/index-modules/index-sorting.asciidoc b/docs/reference/index-modules/index-sorting.asciidoc
index dd355eccbca2a..1334a96872459 100644
--- a/docs/reference/index-modules/index-sorting.asciidoc
+++ b/docs/reference/index-modules/index-sorting.asciidoc
@@ -6,9 +6,8 @@ inside each Shard will be sorted. By default Lucene does not apply any sort.
The `index.sort.*` settings define which fields should be used to sort the documents inside each Segment.
[WARNING]
-nested fields are not compatible with index sorting because they rely on the assumption
-that nested documents are stored in contiguous doc ids, which can be broken by index sorting.
-An error will be thrown if index sorting is activated on an index that contains nested fields.
+It is allowed to apply index sorting to mappings with nested objects, so long as the
+`index.sort.*` setting contains no nested fields.
For instance the following example shows how to define a sort on a single field:
diff --git a/docs/reference/inference/delete-inference.asciidoc b/docs/reference/inference/delete-inference.asciidoc
index dca800c98ca2e..4df72ba672092 100644
--- a/docs/reference/inference/delete-inference.asciidoc
+++ b/docs/reference/inference/delete-inference.asciidoc
@@ -7,8 +7,8 @@ experimental[]
Deletes an {infer} endpoint.
IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in
-{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio or
-Hugging Face. For built-in models and models uploaded though Eland, the {infer}
+{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI or
+Hugging Face. For built-in models and models uploaded through Eland, the {infer}
APIs offer an alternative way to use and manage trained models. However, if you
do not plan to use the {infer} APIs to use these models or if you want to use
non-NLP models, use the <>.
@@ -50,11 +50,11 @@ The type of {infer} task that the model performs.
`dry_run`::
(Optional, Boolean)
When `true`, checks the {infer} processors that reference the endpoint and
-returns them in a list, but does not deletes the endpoint. Defaults to `false`.
+returns them in a list, but does not delete the endpoint. Defaults to `false`.
`force`::
(Optional, Boolean)
-Deletes the endpoint regardless if it's used in an {infer} pipeline or a in a
+Deletes the endpoint regardless if it's used in an {infer} pipeline or in a
`semantic_text` field.
diff --git a/docs/reference/inference/get-inference.asciidoc b/docs/reference/inference/get-inference.asciidoc
index 339146adfece9..c3fe841603bcc 100644
--- a/docs/reference/inference/get-inference.asciidoc
+++ b/docs/reference/inference/get-inference.asciidoc
@@ -7,8 +7,8 @@ experimental[]
Retrieves {infer} endpoint information.
IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in
-{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio or
-Hugging Face. For built-in models and models uploaded though Eland, the {infer}
+{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI or
+Hugging Face. For built-in models and models uploaded through Eland, the {infer}
APIs offer an alternative way to use and manage trained models. However, if you
do not plan to use the {infer} APIs to use these models or if you want to use
non-NLP models, use the <>.
@@ -65,7 +65,7 @@ The type of {infer} task that the model performs.
[[get-inference-api-example]]
==== {api-examples-title}
-The following API call retrives information about the `my-elser-model` {infer}
+The following API call retrieves information about the `my-elser-model` {infer}
model that can perform `sparse_embedding` tasks.
diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc
index 539bba3f0d61f..02a57504da1cf 100644
--- a/docs/reference/inference/inference-apis.asciidoc
+++ b/docs/reference/inference/inference-apis.asciidoc
@@ -6,7 +6,7 @@ experimental[]
IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in
{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio or
-Hugging Face. For built-in models and models uploaded though Eland, the {infer}
+Hugging Face. For built-in models and models uploaded through Eland, the {infer}
APIs offer an alternative way to use and manage trained models. However, if you
do not plan to use the {infer} APIs to use these models or if you want to use
non-NLP models, use the <>.
@@ -25,3 +25,13 @@ include::delete-inference.asciidoc[]
include::get-inference.asciidoc[]
include::post-inference.asciidoc[]
include::put-inference.asciidoc[]
+include::service-azure-ai-studio.asciidoc[]
+include::service-azure-openai.asciidoc[]
+include::service-cohere.asciidoc[]
+include::service-elasticsearch.asciidoc[]
+include::service-elser.asciidoc[]
+include::service-google-ai-studio.asciidoc[]
+include::service-google-vertex-ai.asciidoc[]
+include::service-hugging-face.asciidoc[]
+include::service-mistral.asciidoc[]
+include::service-openai.asciidoc[]
diff --git a/docs/reference/inference/inference-shared.asciidoc b/docs/reference/inference/inference-shared.asciidoc
new file mode 100644
index 0000000000000..2eafa3434e89e
--- /dev/null
+++ b/docs/reference/inference/inference-shared.asciidoc
@@ -0,0 +1,34 @@
+
+tag::api-key-admonition[]
+IMPORTANT: You need to provide the API key only once, during the {infer} model creation.
+The <> does not retrieve your API key.
+After creating the {infer} model, you cannot change the associated API key.
+If you want to use a different API key, delete the {infer} model and recreate it with the same name and the updated API key.
+end::api-key-admonition[]
+
+tag::inference-id[]
+The unique identifier of the {infer} endpoint.
+end::inference-id[]
+
+tag::request-per-minute-example[]
+[source,text]
+----
+"rate_limit": {
+ "requests_per_minute": <>
+}
+----
+end::request-per-minute-example[]
+
+
+tag::service-settings[]
+Settings used to install the {infer} model.
+end::service-settings[]
+
+tag::task-settings[]
+Settings to configure the {infer} task.
+These settings are specific to the `` you specified.
+end::task-settings[]
+
+tag::task-type[]
+The type of the {infer} task that the model will perform.
+end::task-type[]
\ No newline at end of file
diff --git a/docs/reference/inference/post-inference.asciidoc b/docs/reference/inference/post-inference.asciidoc
index 1414e45c07616..52131c0b10776 100644
--- a/docs/reference/inference/post-inference.asciidoc
+++ b/docs/reference/inference/post-inference.asciidoc
@@ -7,8 +7,8 @@ experimental[]
Performs an inference task on an input text by using an {infer} endpoint.
IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in
-{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio or
-Hugging Face. For built-in models and models uploaded though Eland, the {infer}
+{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI or
+Hugging Face. For built-in models and models uploaded through Eland, the {infer}
APIs offer an alternative way to use and manage trained models. However, if you
do not plan to use the {infer} APIs to use these models or if you want to use
non-NLP models, use the <>.
diff --git a/docs/reference/inference/put-inference.asciidoc b/docs/reference/inference/put-inference.asciidoc
index 22ec4fe8fa728..656feb54ffe42 100644
--- a/docs/reference/inference/put-inference.asciidoc
+++ b/docs/reference/inference/put-inference.asciidoc
@@ -7,10 +7,10 @@ experimental[]
Creates an {infer} endpoint to perform an {infer} task.
IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in
-{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio or Hugging Face.
-For built-in models and models uploaded though Eland, the {infer} APIs offer an alternative way to use and manage trained models.
-However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the
-<>.
+{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI or Hugging Face.
+For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models.
+However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>.
+
[discrete]
[[put-inference-api-request]]
@@ -25,851 +25,22 @@ However, if you do not plan to use the {infer} APIs to use these models or if yo
* Requires the `manage_inference` <>
(the built-in `inference_admin` role grants this privilege)
+
[discrete]
[[put-inference-api-desc]]
==== {api-description-title}
-The create {infer} API enables you to create an {infer} endpoint and configure a
-{ml} model to perform a specific {infer} task.
-
-The following services are available through the {infer} API:
-
-* Azure AI Studio
-* Azure OpenAI
-* Cohere
-* Elasticsearch (for built-in models and models uploaded through Eland)
-* ELSER
-* Google AI Studio
-* Hugging Face
-* Mistral
-* OpenAI
-
-[discrete]
-[[put-inference-api-path-params]]
-==== {api-path-parms-title}
-
-``::
-(Required, string)
-The unique identifier of the {infer} endpoint.
-
-``::
-(Required, string)
-The type of the {infer} task that the model will perform.
-Available task types:
-* `completion`,
-* `rerank`,
-* `sparse_embedding`,
-* `text_embedding`.
-
-[discrete]
-[[put-inference-api-request-body]]
-==== {api-request-body-title}
-
-`service`::
-(Required, string)
-The type of service supported for the specified task type.
-Available services:
-
-* `azureopenai`: specify the `completion` or `text_embedding` task type to use the Azure OpenAI service.
-* `azureaistudio`: specify the `completion` or `text_embedding` task type to use the Azure AI Studio service.
-* `cohere`: specify the `completion`, `text_embedding` or the `rerank` task type to use the Cohere service.
-* `elasticsearch`: specify the `text_embedding` task type to use the E5 built-in model or text embedding models uploaded by Eland.
-* `elser`: specify the `sparse_embedding` task type to use the ELSER service.
-* `googleaistudio`: specify the `completion` or `text_embeddig` task to use the Google AI Studio service.
-* `hugging_face`: specify the `text_embedding` task type to use the Hugging Face service.
-* `mistral`: specify the `text_embedding` task type to use the Mistral service.
-* `openai`: specify the `completion` or `text_embedding` task type to use the OpenAI service.
-
-
-`service_settings`::
-(Required, object)
-Settings used to install the {infer} model.
-These settings are specific to the
-`service` you specified.
-+
-.`service_settings` for the `azureaistudio` service
-[%collapsible%closed]
-=====
-
-`api_key`:::
-(Required, string)
-A valid API key of your Azure AI Studio model deployment.
-This key can be found on the overview page for your deployment in the management section of your https://ai.azure.com/[Azure AI Studio] account.
-
-IMPORTANT: You need to provide the API key only once, during the {infer} model creation.
-The <> does not retrieve your API key.
-After creating the {infer} model, you cannot change the associated API key.
-If you want to use a different API key, delete the {infer} model and recreate it with the same name and the updated API key.
-
-`target`:::
-(Required, string)
-The target URL of your Azure AI Studio model deployment.
-This can be found on the overview page for your deployment in the management section of your https://ai.azure.com/[Azure AI Studio] account.
-
-`provider`:::
-(Required, string)
-The model provider for your deployment.
-Note that some providers may support only certain task types.
-Supported providers include:
-
-* `cohere` - available for `text_embedding` and `completion` task types
-* `databricks` - available for `completion` task type only
-* `meta` - available for `completion` task type only
-* `microsoft_phi` - available for `completion` task type only
-* `mistral` - available for `completion` task type only
-* `openai` - available for `text_embedding` and `completion` task types
-
-`endpoint_type`:::
-(Required, string)
-One of `token` or `realtime`.
-Specifies the type of endpoint that is used in your model deployment.
-There are https://learn.microsoft.com/en-us/azure/ai-studio/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio[two endpoint types available] for deployment through Azure AI Studio.
-"Pay as you go" endpoints are billed per token.
-For these, you must specify `token` for your `endpoint_type`.
-For "real-time" endpoints which are billed per hour of usage, specify `realtime`.
-
-`rate_limit`:::
-(Optional, object)
-By default, the `azureaistudio` service sets the number of requests allowed per minute to `240`.
-This helps to minimize the number of rate limit errors returned from Azure AI Studio.
-To modify this, set the `requests_per_minute` setting of this object in your service settings:
-+
-[source,text]
-----
-"rate_limit": {
- "requests_per_minute": <>
-}
-----
-=====
-+
-.`service_settings` for the `azureopenai` service
-[%collapsible%closed]
-=====
-
-`api_key` or `entra_id`:::
-(Required, string)
-You must provide _either_ an API key or an Entra ID.
-If you do not provide either, or provide both, you will receive an error when trying to create your model.
-See the https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication[Azure OpenAI Authentication documentation] for more details on these authentication types.
-
-IMPORTANT: You need to provide the API key or Entra ID only once, during the {infer} model creation.
-The <> does not retrieve your authentication credentials.
-After creating the {infer} model, you cannot change the associated API key or Entra ID.
-If you want to use a different API key or Entra ID, delete the {infer} model and recreate it with the same name and the updated API key.
-You _must_ have either an `api_key` or an `entra_id` defined.
-If neither are present, an error will occur.
-
-`resource_name`:::
-(Required, string)
-The name of your Azure OpenAI resource.
-You can find this from the https://portal.azure.com/#view/HubsExtension/BrowseAll[list of resources] in the Azure Portal for your subscription.
-
-`deployment_id`:::
-(Required, string)
-The deployment name of your deployed models.
-Your Azure OpenAI deployments can be found though the https://oai.azure.com/[Azure OpenAI Studio] portal that is linked to your subscription.
-
-`api_version`:::
-(Required, string)
-The Azure API version ID to use.
-We recommend using the https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings[latest supported non-preview version].
-
-`rate_limit`:::
-(Optional, object)
-The `azureopenai` service sets a default number of requests allowed per minute depending on the task type.
-For `text_embedding` it is set to `1440`.
-For `completion` it is set to `120`.
-This helps to minimize the number of rate limit errors returned from Azure.
-To modify this, set the `requests_per_minute` setting of this object in your service settings:
-+
-[source,text]
-----
-"rate_limit": {
- "requests_per_minute": <>
-}
-----
-+
-More information about the rate limits for Azure can be found in the https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits[Quota limits docs] and https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/quota?tabs=rest[How to change the quotas].
-=====
-+
-.`service_settings` for the `cohere` service
-[%collapsible%closed]
-=====
-`api_key`:::
-(Required, string)
-A valid API key of your Cohere account.
-You can find your Cohere API keys or you can create a new one
-https://dashboard.cohere.com/api-keys[on the API keys settings page].
-
-IMPORTANT: You need to provide the API key only once, during the {infer} model creation.
-The <> does not retrieve your API key.
-After creating the {infer} model, you cannot change the associated API key.
-If you want to use a different API key, delete the {infer} model and recreate it with the same name and the updated API key.
-
-`embedding_type`::
-(Optional, string)
-Only for `text_embedding`.
-Specifies the types of embeddings you want to get back.
-Defaults to `float`.
-Valid values are:
-* `byte`: use it for signed int8 embeddings (this is a synonym of `int8`).
-* `float`: use it for the default float embeddings.
-* `int8`: use it for signed int8 embeddings.
-
-`model_id`::
-(Optional, string)
-The name of the model to use for the {infer} task.
-To review the available `rerank` models, refer to the
-https://docs.cohere.com/reference/rerank-1[Cohere docs].
-
-To review the available `text_embedding` models, refer to the
-https://docs.cohere.com/reference/embed[Cohere docs].
-The default value for
-`text_embedding` is `embed-english-v2.0`.
-
-`rate_limit`:::
-(Optional, object)
-By default, the `cohere` service sets the number of requests allowed per minute to `10000`.
-This value is the same for all task types.
-This helps to minimize the number of rate limit errors returned from Cohere.
-To modify this, set the `requests_per_minute` setting of this object in your service settings:
-+
-[source,text]
-----
-"rate_limit": {
- "requests_per_minute": <>
-}
-----
-+
-More information about Cohere's rate limits can be found in https://docs.cohere.com/docs/going-live#production-key-specifications[Cohere's production key docs].
-
-=====
-+
-.`service_settings` for the `elasticsearch` service
-[%collapsible%closed]
-=====
-
-`model_id`:::
-(Required, string)
-The name of the model to use for the {infer} task.
-It can be the ID of either a built-in model (for example, `.multilingual-e5-small` for E5) or a text embedding model already
-{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland].
-
-`num_allocations`:::
-(Required, integer)
-The number of model allocations to create. `num_allocations` must not exceed the number of available processors per node divided by the `num_threads`.
-
-`num_threads`:::
-(Required, integer)
-The number of threads to use by each model allocation. `num_threads` must not exceed the number of available processors per node divided by the number of allocations.
-Must be a power of 2. Max allowed value is 32.
-
-=====
-+
-.`service_settings` for the `elser` service
-[%collapsible%closed]
-=====
-
-`num_allocations`:::
-(Required, integer)
-The number of model allocations to create. `num_allocations` must not exceed the number of available processors per node divided by the `num_threads`.
-
-`num_threads`:::
-(Required, integer)
-The number of threads to use by each model allocation. `num_threads` must not exceed the number of available processors per node divided by the number of allocations.
-Must be a power of 2. Max allowed value is 32.
-
-=====
-+
-.`service_settings` for the `googleiastudio` service
-[%collapsible%closed]
-=====
-
-`api_key`:::
-(Required, string)
-A valid API key for the Google Gemini API.
-
-`model_id`:::
-(Required, string)
-The name of the model to use for the {infer} task.
-You can find the supported models at https://ai.google.dev/gemini-api/docs/models/gemini[Gemini API models].
-
-`rate_limit`:::
-(Optional, object)
-By default, the `googleaistudio` service sets the number of requests allowed per minute to `360`.
-This helps to minimize the number of rate limit errors returned from Google AI Studio.
-To modify this, set the `requests_per_minute` setting of this object in your service settings:
-+
---
-[source,text]
-----
-"rate_limit": {
- "requests_per_minute": <>
-}
-----
---
-
-=====
-+
-.`service_settings` for the `hugging_face` service
-[%collapsible%closed]
-=====
-
-`api_key`:::
-(Required, string)
-A valid access token of your Hugging Face account.
-You can find your Hugging Face access tokens or you can create a new one
-https://huggingface.co/settings/tokens[on the settings page].
-
-IMPORTANT: You need to provide the API key only once, during the {infer} model creation.
-The <> does not retrieve your API key.
-After creating the {infer} model, you cannot change the associated API key.
-If you want to use a different API key, delete the {infer} model and recreate it with the same name and the updated API key.
-
-`url`:::
-(Required, string)
-The URL endpoint to use for the requests.
-
-`rate_limit`:::
-(Optional, object)
-By default, the `huggingface` service sets the number of requests allowed per minute to `3000`.
-This helps to minimize the number of rate limit errors returned from Hugging Face.
-To modify this, set the `requests_per_minute` setting of this object in your service settings:
-+
-[source,text]
-----
-"rate_limit": {
- "requests_per_minute": <>
-}
-----
-
-=====
-+
-.`service_settings` for the `mistral` service
-[%collapsible%closed]
-=====
-
-`api_key`:::
-(Required, string)
-A valid API key for your Mistral account.
-You can find your Mistral API keys or you can create a new one
-https://console.mistral.ai/api-keys/[on the API Keys page].
-
-`model`:::
-(Required, string)
-The name of the model to use for the {infer} task.
-Refer to the https://docs.mistral.ai/getting-started/models/[Mistral models documentation]
-for the list of available text embedding models.
-
-`max_input_tokens`:::
-(Optional, integer)
-Allows you to specify the maximum number of tokens per input before chunking occurs.
-
-`rate_limit`:::
-(Optional, object)
-By default, the `mistral` service sets the number of requests allowed per minute to `240`.
-This helps to minimize the number of rate limit errors returned from the Mistral API.
-To modify this, set the `requests_per_minute` setting of this object in your service settings:
-+
-[source,text]
-----
-"rate_limit": {
- "requests_per_minute": <>
-}
-----
-
-=====
-+
-.`service_settings` for the `openai` service
-[%collapsible%closed]
-=====
-
-`api_key`:::
-(Required, string)
-A valid API key of your OpenAI account.
-You can find your OpenAI API keys in your OpenAI account under the
-https://platform.openai.com/api-keys[API keys section].
-
-IMPORTANT: You need to provide the API key only once, during the {infer} model creation.
-The <> does not retrieve your API key.
-After creating the {infer} model, you cannot change the associated API key.
-If you want to use a different API key, delete the {infer} model and recreate it with the same name and the updated API key.
-
-`model_id`:::
-(Required, string)
-The name of the model to use for the {infer} task.
-Refer to the
-https://platform.openai.com/docs/guides/embeddings/what-are-embeddings[OpenAI documentation]
-for the list of available text embedding models.
-
-`organization_id`:::
-(Optional, string)
-The unique identifier of your organization.
-You can find the Organization ID in your OpenAI account under
-https://platform.openai.com/account/organization[**Settings** > **Organizations**].
-
-`url`:::
-(Optional, string)
-The URL endpoint to use for the requests.
-Can be changed for testing purposes.
-Defaults to `https://api.openai.com/v1/embeddings`.
-
-`rate_limit`:::
-(Optional, object)
-The `openai` service sets a default number of requests allowed per minute depending on the task type.
-For `text_embedding` it is set to `3000`.
-For `completion` it is set to `500`.
-This helps to minimize the number of rate limit errors returned from Azure.
-To modify this, set the `requests_per_minute` setting of this object in your service settings:
-+
-[source,text]
-----
-"rate_limit": {
- "requests_per_minute": <>
-}
-----
-+
-More information about the rate limits for OpenAI can be found in your https://platform.openai.com/account/limits[Account limits].
-
-=====
-
-`task_settings`::
-(Optional, object)
-Settings to configure the {infer} task.
-These settings are specific to the
-`` you specified.
-+
-.`task_settings` for the `completion` task type
-[%collapsible%closed]
-=====
-
-`do_sample`:::
-(Optional, float)
-For the `azureaistudio` service only.
-Instructs the inference process to perform sampling or not.
-Has not affect unless `temperature` or `top_p` is specified.
-
-`max_new_tokens`:::
-(Optional, integer)
-For the `azureaistudio` service only.
-Provides a hint for the maximum number of output tokens to be generated.
-Defaults to 64.
-
-`user`:::
-(Optional, string)
-For `openai` service only.
-Specifies the user issuing the request, which can be used for abuse detection.
-
-`temperature`:::
-(Optional, float)
-For the `azureaistudio` service only.
-A number in the range of 0.0 to 2.0 that specifies the sampling temperature to use that controls the apparent creativity of generated completions.
-Should not be used if `top_p` is specified.
-
-`top_p`:::
-(Optional, float)
-For the `azureaistudio` service only.
-A number in the range of 0.0 to 2.0 that is an alternative value to temperature that causes the model to consider the results of the tokens with nucleus sampling probability.
-Should not be used if `temperature` is specified.
-
-=====
-+
-.`task_settings` for the `rerank` task type
-[%collapsible%closed]
-=====
-
-`return_documents`::
-(Optional, boolean)
-For `cohere` service only.
-Specify whether to return doc text within the results.
-
-`top_n`::
-(Optional, integer)
-The number of most relevant documents to return, defaults to the number of the documents.
-
-=====
-+
-.`task_settings` for the `text_embedding` task type
-[%collapsible%closed]
-=====
-
-`input_type`:::
-(Optional, string)
-For `cohere` service only.
-Specifies the type of input passed to the model.
-Valid values are:
-* `classification`: use it for embeddings passed through a text classifier.
-* `clusterning`: use it for the embeddings run through a clustering algorithm.
-* `ingest`: use it for storing document embeddings in a vector database.
-* `search`: use it for storing embeddings of search queries run against a vector database to find relevant documents.
-+
-IMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.
-
-`truncate`:::
-(Optional, string)
-For `cohere` service only.
-Specifies how the API handles inputs longer than the maximum token length.
-Defaults to `END`.
-Valid values are:
-* `NONE`: when the input exceeds the maximum input token length an error is returned.
-* `START`: when the input exceeds the maximum input token length the start of the input is discarded.
-* `END`: when the input exceeds the maximum input token length the end of the input is discarded.
-
-`user`:::
-(optional, string)
-For `openai`, `azureopenai` and `azureaistudio` services only.
-Specifies the user issuing the request, which can be used for abuse detection.
-
-=====
-[discrete]
-[[put-inference-api-example]]
-==== {api-examples-title}
-
-This section contains example API calls for every service type.
-
-[discrete]
-[[inference-example-azureaistudio]]
-===== Azure AI Studio service
-
-The following example shows how to create an {infer} endpoint called
-`azure_ai_studio_embeddings` to perform a `text_embedding` task type.
-Note that we do not specify a model here, as it is defined already via our Azure AI Studio deployment.
-
-The list of embeddings models that you can choose from in your deployment can be found in the https://ai.azure.com/explore/models?selectedTask=embeddings[Azure AI Studio model explorer].
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/text_embedding/azure_ai_studio_embeddings
-{
- "service": "azureaistudio",
- "service_settings": {
- "api_key": "",
- "target": "",
- "provider": "",
- "endpoint_type": ""
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-
-The next example shows how to create an {infer} endpoint called
-`azure_ai_studio_completion` to perform a `completion` task type.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/completion/azure_ai_studio_completion
-{
- "service": "azureaistudio",
- "service_settings": {
- "api_key": "",
- "target": "",
- "provider": "",
- "endpoint_type": ""
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-
-The list of chat completion models that you can choose from in your deployment can be found in the https://ai.azure.com/explore/models?selectedTask=chat-completion[Azure AI Studio model explorer].
-
-[discrete]
-[[inference-example-azureopenai]]
-===== Azure OpenAI service
-
-The following example shows how to create an {infer} endpoint called
-`azure_openai_embeddings` to perform a `text_embedding` task type.
-Note that we do not specify a model here, as it is defined already via our Azure OpenAI deployment.
-
-The list of embeddings models that you can choose from in your deployment can be found in the https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#embeddings[Azure models documentation].
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/text_embedding/azure_openai_embeddings
-{
- "service": "azureopenai",
- "service_settings": {
- "api_key": "",
- "resource_name": "",
- "deployment_id": "",
- "api_version": "2024-02-01"
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-
-The next example shows how to create an {infer} endpoint called
-`azure_openai_completion` to perform a `completion` task type.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/completion/azure_openai_completion
-{
- "service": "azureopenai",
- "service_settings": {
- "api_key": "",
- "resource_name": "",
- "deployment_id": "",
- "api_version": "2024-02-01"
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-
-The list of chat completion models that you can choose from in your Azure OpenAI deployment can be found at the following places:
-
-* https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-and-gpt-4-turbo-models[GPT-4 and GPT-4 Turbo models]
-* https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-35[GPT-3.5]
-
-[discrete]
-[[inference-example-cohere]]
-===== Cohere service
-
-The following example shows how to create an {infer} endpoint called
-`cohere-embeddings` to perform a `text_embedding` task type.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/text_embedding/cohere-embeddings
-{
- "service": "cohere",
- "service_settings": {
- "api_key": "",
- "model_id": "embed-english-light-v3.0",
- "embedding_type": "byte"
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-
-
-The following example shows how to create an {infer} endpoint called
-`cohere-rerank` to perform a `rerank` task type.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/rerank/cohere-rerank
-{
- "service": "cohere",
- "service_settings": {
- "api_key": "",
- "model_id": "rerank-english-v3.0"
- },
- "task_settings": {
- "top_n": 10,
- "return_documents": true
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-
-For more examples, also review the
-https://docs.cohere.com/docs/elasticsearch-and-cohere#rerank-search-results-with-cohere-and-elasticsearch[Cohere documentation].
-
-[discrete]
-[[inference-example-e5]]
-===== E5 via the `elasticsearch` service
-
-The following example shows how to create an {infer} endpoint called
-`my-e5-model` to perform a `text_embedding` task type.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/text_embedding/my-e5-model
-{
- "service": "elasticsearch",
- "service_settings": {
- "num_allocations": 1,
- "num_threads": 1,
- "model_id": ".multilingual-e5-small" <1>
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-<1> The `model_id` must be the ID of one of the built-in E5 models.
-Valid values are `.multilingual-e5-small` and `.multilingual-e5-small_linux-x86_64`.
-For further details, refer to the {ml-docs}/ml-nlp-e5.html[E5 model documentation].
-
-[discrete]
-[[inference-example-elser]]
-===== ELSER service
-
-The following example shows how to create an {infer} endpoint called
-`my-elser-model` to perform a `sparse_embedding` task type.
-Refer to the {ml-docs}/ml-nlp-elser.html[ELSER model documentation] for more info.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/sparse_embedding/my-elser-model
-{
- "service": "elser",
- "service_settings": {
- "num_allocations": 1,
- "num_threads": 1
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-
-
-Example response:
-
-[source,console-result]
-------------------------------------------------------------
-{
- "inference_id": "my-elser-model",
- "task_type": "sparse_embedding",
- "service": "elser",
- "service_settings": {
- "num_allocations": 1,
- "num_threads": 1
- },
- "task_settings": {}
-}
-------------------------------------------------------------
-// NOTCONSOLE
-
-
-[discrete]
-[[inference-example-googleaistudio]]
-===== Google AI Studio service
-
-The following example shows how to create an {infer} endpoint called
-`google_ai_studio_completion` to perform a `completion` task type.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/completion/google_ai_studio_completion
-{
- "service": "googleaistudio",
- "service_settings": {
- "api_key": "",
- "model_id": ""
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-
-
-[discrete]
-[[inference-example-hugging-face]]
-===== Hugging Face service
-
-The following example shows how to create an {infer} endpoint called
-`hugging-face-embeddings` to perform a `text_embedding` task type.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/text_embedding/hugging-face-embeddings
-{
- "service": "hugging_face",
- "service_settings": {
- "api_key": "", <1>
- "url": "" <2>
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-<1> A valid Hugging Face access token.
-You can find on the
-https://huggingface.co/settings/tokens[settings page of your account].
-<2> The {infer} endpoint URL you created on Hugging Face.
-
-Create a new {infer} endpoint on
-https://ui.endpoints.huggingface.co/[the Hugging Face endpoint page] to get an endpoint URL.
-Select the model you want to use on the new endpoint creation page - for example `intfloat/e5-small-v2` - then select the `Sentence Embeddings`
-task under the Advanced configuration section.
-Create the endpoint.
-Copy the URL after the endpoint initialization has been finished.
-
-[discrete]
-[[inference-example-hugging-face-supported-models]]
-The list of recommended models for the Hugging Face service:
-
-* https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2[all-MiniLM-L6-v2]
-* https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2[all-MiniLM-L12-v2]
-* https://huggingface.co/sentence-transformers/all-mpnet-base-v2[all-mpnet-base-v2]
-* https://huggingface.co/intfloat/e5-base-v2[e5-base-v2]
-* https://huggingface.co/intfloat/e5-small-v2[e5-small-v2]
-* https://huggingface.co/intfloat/multilingual-e5-base[multilingual-e5-base]
-* https://huggingface.co/intfloat/multilingual-e5-small[multilingual-e5-small]
-
-[discrete]
-[[inference-example-eland]]
-===== Models uploaded by Eland via the elasticsearch service
-
-The following example shows how to create an {infer} endpoint called
-`my-msmarco-minilm-model` to perform a `text_embedding` task type.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/text_embedding/my-msmarco-minilm-model
-{
- "service": "elasticsearch",
- "service_settings": {
- "num_allocations": 1,
- "num_threads": 1,
- "model_id": "msmarco-MiniLM-L12-cos-v5" <1>
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-<1> The `model_id` must be the ID of a text embedding model which has already been
-{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland].
-
-[discrete]
-[[inference-example-mistral]]
-===== Mistral Service
-
-The following example shows how to create an {infer} endpoint called
-`mistral-embeddings-test` to perform a `text_embedding` task type.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/text_embedding/mistral-embeddings-test
-{
- "service": "mistral",
- "service_settings": {
- "api_key": "",
- "model": "mistral-embed" <1>
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
-<1> The `model` must be the ID of a text embedding model which can be found in the
-https://docs.mistral.ai/getting-started/models/[Mistral models documentation]
-
-[discrete]
-[[inference-example-openai]]
-===== OpenAI service
-
-The following example shows how to create an {infer} endpoint called
-`openai-embeddings` to perform a `text_embedding` task type.
-
-[source,console]
-------------------------------------------------------------
-PUT _inference/text_embedding/openai-embeddings
-{
- "service": "openai",
- "service_settings": {
- "api_key": "",
- "model_id": "text-embedding-ada-002"
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
+The create {infer} API enables you to create an {infer} endpoint and configure a {ml} model to perform a specific {infer} task.
-The next example shows how to create an {infer} endpoint called
-`openai-completion` to perform a `completion` task type.
+The following services are available through the {infer} API, click the links to review the configuration details of the services:
-[source,console]
-------------------------------------------------------------
-PUT _inference/completion/openai-completion
-{
- "service": "openai",
- "service_settings": {
- "api_key": "",
- "model_id": "gpt-3.5-turbo"
- }
-}
-------------------------------------------------------------
-// TEST[skip:TBD]
+* <>
+* <>
+* <>
+* <> (for built-in models and models uploaded through Eland)
+* <>
+* <>
+* <>
+* <>
+* <>
+* <>
diff --git a/docs/reference/inference/service-azure-ai-studio.asciidoc b/docs/reference/inference/service-azure-ai-studio.asciidoc
new file mode 100644
index 0000000000000..0d711a0d6171f
--- /dev/null
+++ b/docs/reference/inference/service-azure-ai-studio.asciidoc
@@ -0,0 +1,173 @@
+[[infer-service-azure-ai-studio]]
+=== Azure AI studio {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `azureaistudio` service.
+
+
+[discrete]
+[[infer-service-azure-ai-studio-api-request]]
+==== {api-request-title}
+
+`PUT /_inference//`
+
+[discrete]
+[[infer-service-azure-ai-studio-api-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `completion`,
+* `text_embedding`.
+--
+
+[discrete]
+[[infer-service-azure-ai-studio-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string)
+The type of service supported for the specified task type. In this case,
+`azureaistudio`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `azureaistudio` service.
+--
+
+`api_key`:::
+(Required, string)
+A valid API key of your Azure AI Studio model deployment.
+This key can be found on the overview page for your deployment in the management section of your https://ai.azure.com/[Azure AI Studio] account.
++
+--
+include::inference-shared.asciidoc[tag=api-key-admonition]
+--
+
+`target`:::
+(Required, string)
+The target URL of your Azure AI Studio model deployment.
+This can be found on the overview page for your deployment in the management section of your https://ai.azure.com/[Azure AI Studio] account.
+
+`provider`:::
+(Required, string)
+The model provider for your deployment.
+Note that some providers may support only certain task types.
+Supported providers include:
+
+* `cohere` - available for `text_embedding` and `completion` task types
+* `databricks` - available for `completion` task type only
+* `meta` - available for `completion` task type only
+* `microsoft_phi` - available for `completion` task type only
+* `mistral` - available for `completion` task type only
+* `openai` - available for `text_embedding` and `completion` task types
+
+`endpoint_type`:::
+(Required, string)
+One of `token` or `realtime`.
+Specifies the type of endpoint that is used in your model deployment.
+There are https://learn.microsoft.com/en-us/azure/ai-studio/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio[two endpoint types available] for deployment through Azure AI Studio.
+"Pay as you go" endpoints are billed per token.
+For these, you must specify `token` for your `endpoint_type`.
+For "real-time" endpoints which are billed per hour of usage, specify `realtime`.
+
+`rate_limit`:::
+(Optional, object)
+By default, the `azureaistudio` service sets the number of requests allowed per minute to `240`.
+This helps to minimize the number of rate limit errors returned from Azure AI Studio.
+To modify this, set the `requests_per_minute` setting of this object in your service settings:
++
+--
+include::inference-shared.asciidoc[tag=request-per-minute-example]
+--
+
+`task_settings`::
+(Optional, object)
+include::inference-shared.asciidoc[tag=task-settings]
++
+.`task_settings` for the `completion` task type
+[%collapsible%closed]
+=====
+`do_sample`:::
+(Optional, float)
+Instructs the inference process to perform sampling or not.
+Has no effect unless `temperature` or `top_p` is specified.
+
+`max_new_tokens`:::
+(Optional, integer)
+Provides a hint for the maximum number of output tokens to be generated.
+Defaults to 64.
+
+`temperature`:::
+(Optional, float)
+A number in the range of 0.0 to 2.0 that specifies the sampling temperature to use that controls the apparent creativity of generated completions.
+Should not be used if `top_p` is specified.
+
+`top_p`:::
+(Optional, float)
+A number in the range of 0.0 to 2.0 that is an alternative value to temperature that causes the model to consider the results of the tokens with nucleus sampling probability.
+Should not be used if `temperature` is specified.
+=====
++
+.`task_settings` for the `text_embedding` task type
+[%collapsible%closed]
+=====
+`user`:::
+(optional, string)
+Specifies the user issuing the request, which can be used for abuse detection.
+=====
+
+
+[discrete]
+[[inference-example-azureaistudio]]
+==== Azure AI Studio service example
+
+The following example shows how to create an {infer} endpoint called `azure_ai_studio_embeddings` to perform a `text_embedding` task type.
+Note that we do not specify a model here, as it is defined already via our Azure AI Studio deployment.
+
+The list of embeddings models that you can choose from in your deployment can be found in the https://ai.azure.com/explore/models?selectedTask=embeddings[Azure AI Studio model explorer].
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/azure_ai_studio_embeddings
+{
+ "service": "azureaistudio",
+ "service_settings": {
+ "api_key": "",
+ "target": "",
+ "provider": "",
+ "endpoint_type": ""
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+The next example shows how to create an {infer} endpoint called `azure_ai_studio_completion` to perform a `completion` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/completion/azure_ai_studio_completion
+{
+ "service": "azureaistudio",
+ "service_settings": {
+ "api_key": "",
+ "target": "",
+ "provider": "",
+ "endpoint_type": ""
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+The list of chat completion models that you can choose from in your deployment can be found in the https://ai.azure.com/explore/models?selectedTask=chat-completion[Azure AI Studio model explorer].
diff --git a/docs/reference/inference/service-azure-openai.asciidoc b/docs/reference/inference/service-azure-openai.asciidoc
new file mode 100644
index 0000000000000..6f03c5966d9e6
--- /dev/null
+++ b/docs/reference/inference/service-azure-openai.asciidoc
@@ -0,0 +1,156 @@
+[[infer-service-azure-openai]]
+=== Azure OpenAI {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `azureopenai` service.
+
+
+[discrete]
+[[infer-service-azure-openai-api-request]]
+==== {api-request-title}
+
+`PUT /_inference//`
+
+[discrete]
+[[infer-service-azure-openai-api-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `completion`,
+* `text_embedding`.
+--
+
+[discrete]
+[[infer-service-azure-openai-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string)
+The type of service supported for the specified task type. In this case,
+`azureopenai`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `azureopenai` service.
+--
+
+`api_key` or `entra_id`:::
+(Required, string)
+You must provide _either_ an API key or an Entra ID.
+If you do not provide either, or provide both, you will receive an error when trying to create your model.
+See the https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication[Azure OpenAI Authentication documentation] for more details on these authentication types.
++
+--
+include::inference-shared.asciidoc[tag=api-key-admonition]
+--
+
+`resource_name`:::
+(Required, string)
+The name of your Azure OpenAI resource.
+You can find this from the https://portal.azure.com/#view/HubsExtension/BrowseAll[list of resources] in the Azure Portal for your subscription.
+
+`deployment_id`:::
+(Required, string)
+The deployment name of your deployed models.
+Your Azure OpenAI deployments can be found though the https://oai.azure.com/[Azure OpenAI Studio] portal that is linked to your subscription.
+
+`api_version`:::
+(Required, string)
+The Azure API version ID to use.
+We recommend using the https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings[latest supported non-preview version].
+
+`rate_limit`:::
+(Optional, object)
+The `azureopenai` service sets a default number of requests allowed per minute depending on the task type.
+For `text_embedding` it is set to `1440`.
+For `completion` it is set to `120`.
+This helps to minimize the number of rate limit errors returned from Azure.
+To modify this, set the `requests_per_minute` setting of this object in your service settings:
++
+--
+include::inference-shared.asciidoc[tag=request-per-minute-example]
+
+More information about the rate limits for Azure can be found in the https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits[Quota limits docs] and https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/quota?tabs=rest[How to change the quotas].
+--
+
+`task_settings`::
+(Optional, object)
+include::inference-shared.asciidoc[tag=task-settings]
++
+.`task_settings` for the `completion` task type
+[%collapsible%closed]
+=====
+`user`:::
+(optional, string)
+Specifies the user issuing the request, which can be used for abuse detection.
+=====
++
+.`task_settings` for the `text_embedding` task type
+[%collapsible%closed]
+=====
+`user`:::
+(optional, string)
+Specifies the user issuing the request, which can be used for abuse detection.
+=====
+
+
+
+[discrete]
+[[inference-example-azure-openai]]
+==== Azure OpenAI service example
+
+The following example shows how to create an {infer} endpoint called
+`azure_openai_embeddings` to perform a `text_embedding` task type.
+Note that we do not specify a model here, as it is defined already via our Azure OpenAI deployment.
+
+The list of embeddings models that you can choose from in your deployment can be found in the https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#embeddings[Azure models documentation].
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/azure_openai_embeddings
+{
+ "service": "azureopenai",
+ "service_settings": {
+ "api_key": "",
+ "resource_name": "",
+ "deployment_id": "",
+ "api_version": "2024-02-01"
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+The next example shows how to create an {infer} endpoint called
+`azure_openai_completion` to perform a `completion` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/completion/azure_openai_completion
+{
+ "service": "azureopenai",
+ "service_settings": {
+ "api_key": "",
+ "resource_name": "",
+ "deployment_id": "",
+ "api_version": "2024-02-01"
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+The list of chat completion models that you can choose from in your Azure OpenAI deployment can be found at the following places:
+
+* https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-and-gpt-4-turbo-models[GPT-4 and GPT-4 Turbo models]
+* https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-35[GPT-3.5]
\ No newline at end of file
diff --git a/docs/reference/inference/service-cohere.asciidoc b/docs/reference/inference/service-cohere.asciidoc
new file mode 100644
index 0000000000000..52d71e0bc02a5
--- /dev/null
+++ b/docs/reference/inference/service-cohere.asciidoc
@@ -0,0 +1,204 @@
+[[infer-service-cohere]]
+=== Cohere {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `cohere` service.
+
+
+[discrete]
+[[infer-service-cohere-api-request]]
+==== {api-request-title}
+
+`PUT /_inference//`
+
+[discrete]
+[[infer-service-cohere-api-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `completion`,
+* `rerank`,
+* `text_embedding`.
+--
+
+[discrete]
+[[infer-service-cohere-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string)
+The type of service supported for the specified task type. In this case,
+`cohere`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `cohere` service.
+--
+
+`api_key`:::
+(Required, string)
+A valid API key of your Cohere account.
+You can find your Cohere API keys or you can create a new one
+https://dashboard.cohere.com/api-keys[on the API keys settings page].
++
+--
+include::inference-shared.asciidoc[tag=api-key-admonition]
+--
+
+`rate_limit`:::
+(Optional, object)
+By default, the `cohere` service sets the number of requests allowed per minute to `10000`.
+This value is the same for all task types.
+This helps to minimize the number of rate limit errors returned from Cohere.
+To modify this, set the `requests_per_minute` setting of this object in your service settings:
++
+--
+include::inference-shared.asciidoc[tag=request-per-minute-example]
+
+More information about Cohere's rate limits can be found in https://docs.cohere.com/docs/going-live#production-key-specifications[Cohere's production key docs].
+--
++
+.`service_settings` for the `completion` task type
+[%collapsible%closed]
+=====
+`model_id`::
+(Optional, string)
+The name of the model to use for the {infer} task.
+To review the available `completion` models, refer to the
+https://docs.cohere.com/docs/models#command[Cohere docs].
+=====
++
+.`service_settings` for the `rerank` task type
+[%collapsible%closed]
+=====
+`model_id`::
+(Optional, string)
+The name of the model to use for the {infer} task.
+To review the available `rerank` models, refer to the
+https://docs.cohere.com/reference/rerank-1[Cohere docs].
+=====
++
+.`service_settings` for the `text_embedding` task type
+[%collapsible%closed]
+=====
+`embedding_type`:::
+(Optional, string)
+Specifies the types of embeddings you want to get back.
+Defaults to `float`.
+Valid values are:
+* `byte`: use it for signed int8 embeddings (this is a synonym of `int8`).
+* `float`: use it for the default float embeddings.
+* `int8`: use it for signed int8 embeddings.
+
+`model_id`:::
+(Optional, string)
+The name of the model to use for the {infer} task.
+To review the available `text_embedding` models, refer to the
+https://docs.cohere.com/reference/embed[Cohere docs].
+The default value for `text_embedding` is `embed-english-v2.0`.
+
+`similarity`:::
+(Optional, string)
+Similarity measure. One of `cosine`, `dot_product`, `l2_norm`.
+Defaults based on the `embedding_type` (`float` -> `dot_product`, `int8/byte` -> `cosine`).
+=====
+
+
+
+`task_settings`::
+(Optional, object)
+include::inference-shared.asciidoc[tag=task-settings]
++
+.`task_settings` for the `rerank` task type
+[%collapsible%closed]
+=====
+`return_documents`::
+(Optional, boolean)
+Specify whether to return doc text within the results.
+
+`top_n`::
+(Optional, integer)
+The number of most relevant documents to return, defaults to the number of the documents.
+=====
++
+.`task_settings` for the `text_embedding` task type
+[%collapsible%closed]
+=====
+`input_type`:::
+(Optional, string)
+Specifies the type of input passed to the model.
+Valid values are:
+* `classification`: use it for embeddings passed through a text classifier.
+* `clusterning`: use it for the embeddings run through a clustering algorithm.
+* `ingest`: use it for storing document embeddings in a vector database.
+* `search`: use it for storing embeddings of search queries run against a vector database to find relevant documents.
++
+IMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.
+
+`truncate`:::
+(Optional, string)
+Specifies how the API handles inputs longer than the maximum token length.
+Defaults to `END`.
+Valid values are:
+* `NONE`: when the input exceeds the maximum input token length an error is returned.
+* `START`: when the input exceeds the maximum input token length the start of the input is discarded.
+* `END`: when the input exceeds the maximum input token length the end of the input is discarded.
+=====
+
+
+[discrete]
+[[inference-example-cohere]]
+==== Cohere service examples
+
+The following example shows how to create an {infer} endpoint called
+`cohere-embeddings` to perform a `text_embedding` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/cohere-embeddings
+{
+ "service": "cohere",
+ "service_settings": {
+ "api_key": "",
+ "model_id": "embed-english-light-v3.0",
+ "embedding_type": "byte"
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+
+The following example shows how to create an {infer} endpoint called
+`cohere-rerank` to perform a `rerank` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/rerank/cohere-rerank
+{
+ "service": "cohere",
+ "service_settings": {
+ "api_key": "",
+ "model_id": "rerank-english-v3.0"
+ },
+ "task_settings": {
+ "top_n": 10,
+ "return_documents": true
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+For more examples, also review the
+https://docs.cohere.com/docs/elasticsearch-and-cohere#rerank-search-results-with-cohere-and-elasticsearch[Cohere documentation].
\ No newline at end of file
diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc
new file mode 100644
index 0000000000000..3b9b5b1928d7b
--- /dev/null
+++ b/docs/reference/inference/service-elasticsearch.asciidoc
@@ -0,0 +1,122 @@
+[[infer-service-elasticsearch]]
+=== Elasticsearch {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `elasticsearch` service.
+
+
+[discrete]
+[[infer-service-elasticsearch-api-request]]
+==== {api-request-title}
+
+`PUT /_inference//`
+
+[discrete]
+[[infer-service-elasticsearch-api-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `rerank`,
+* `text_embedding`.
+--
+
+[discrete]
+[[infer-service-elasticsearch-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string)
+The type of service supported for the specified task type. In this case,
+`elasticsearch`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `elasticsearch` service.
+--
+
+`model_id`:::
+(Required, string)
+The name of the model to use for the {infer} task.
+It can be the ID of either a built-in model (for example, `.multilingual-e5-small` for E5) or a text embedding model already
+{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland].
+
+`num_allocations`:::
+(Required, integer)
+The total number of allocations this model is assigned across machine learning nodes. Increasing this value generally increases the throughput.
+
+`num_threads`:::
+(Required, integer)
+Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.
+Must be a power of 2. Max allowed value is 32.
+
+`task_settings`::
+(Optional, object)
+include::inference-shared.asciidoc[tag=task-settings]
++
+.`task_settings` for the `rerank` task type
+[%collapsible%closed]
+=====
+`return_documents`:::
+(Optional, Boolean)
+Returns the document instead of only the index. Defaults to `true`.
+=====
+
+
+[discrete]
+[[inference-example-elasticsearch]]
+==== E5 via the `elasticsearch` service
+
+The following example shows how to create an {infer} endpoint called
+`my-e5-model` to perform a `text_embedding` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/my-e5-model
+{
+ "service": "elasticsearch",
+ "service_settings": {
+ "num_allocations": 1,
+ "num_threads": 1,
+ "model_id": ".multilingual-e5-small" <1>
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+<1> The `model_id` must be the ID of one of the built-in E5 models.
+Valid values are `.multilingual-e5-small` and `.multilingual-e5-small_linux-x86_64`.
+For further details, refer to the {ml-docs}/ml-nlp-e5.html[E5 model documentation].
+
+[discrete]
+[[inference-example-eland]]
+==== Models uploaded by Eland via the elasticsearch service
+
+The following example shows how to create an {infer} endpoint called
+`my-msmarco-minilm-model` to perform a `text_embedding` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/my-msmarco-minilm-model
+{
+ "service": "elasticsearch",
+ "service_settings": {
+ "num_allocations": 1,
+ "num_threads": 1,
+ "model_id": "msmarco-MiniLM-L12-cos-v5" <1>
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+<1> The `model_id` must be the ID of a text embedding model which has already been
+{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland].
\ No newline at end of file
diff --git a/docs/reference/inference/service-elser.asciidoc b/docs/reference/inference/service-elser.asciidoc
new file mode 100644
index 0000000000000..829ff4968c5be
--- /dev/null
+++ b/docs/reference/inference/service-elser.asciidoc
@@ -0,0 +1,95 @@
+[[infer-service-elser]]
+=== ELSER {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `elser` service.
+
+
+[discrete]
+[[infer-service-elser-api-request]]
+==== {api-request-title}
+
+`PUT /_inference//`
+
+[discrete]
+[[infer-service-elser-api-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `sparse_embedding`.
+--
+
+[discrete]
+[[infer-service-elser-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string)
+The type of service supported for the specified task type. In this case,
+`elser`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `elser` service.
+--
+
+`num_allocations`:::
+(Required, integer)
+The total number of allocations this model is assigned across machine learning nodes. Increasing this value generally increases the throughput.
+
+`num_threads`:::
+(Required, integer)
+Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.
+Must be a power of 2. Max allowed value is 32.
+
+
+[discrete]
+[[inference-example-elser]]
+==== ELSER service example
+
+The following example shows how to create an {infer} endpoint called
+`my-elser-model` to perform a `sparse_embedding` task type.
+Refer to the {ml-docs}/ml-nlp-elser.html[ELSER model documentation] for more info.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/sparse_embedding/my-elser-model
+{
+ "service": "elser",
+ "service_settings": {
+ "num_allocations": 1,
+ "num_threads": 1
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+
+Example response:
+
+[source,console-result]
+------------------------------------------------------------
+{
+ "inference_id": "my-elser-model",
+ "task_type": "sparse_embedding",
+ "service": "elser",
+ "service_settings": {
+ "num_allocations": 1,
+ "num_threads": 1
+ },
+ "task_settings": {}
+}
+------------------------------------------------------------
+// NOTCONSOLE
\ No newline at end of file
diff --git a/docs/reference/inference/service-google-ai-studio.asciidoc b/docs/reference/inference/service-google-ai-studio.asciidoc
new file mode 100644
index 0000000000000..25aa89cd49110
--- /dev/null
+++ b/docs/reference/inference/service-google-ai-studio.asciidoc
@@ -0,0 +1,87 @@
+[[infer-service-google-ai-studio]]
+=== Google AI Studio {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `googleaistudio` service.
+
+
+[discrete]
+[[infer-service-google-ai-studio-api-request]]
+==== {api-request-title}
+
+`PUT /_inference//`
+
+[discrete]
+[[infer-service-google-ai-studio-api-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `completion`,
+* `text_embedding`.
+--
+
+[discrete]
+[[infer-service-google-ai-studio-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string)
+The type of service supported for the specified task type. In this case,
+`googleaistudio`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `googleaistudio` service.
+--
+
+`api_key`:::
+(Required, string)
+A valid API key for the Google Gemini API.
+
+`model_id`:::
+(Required, string)
+The name of the model to use for the {infer} task.
+You can find the supported models at https://ai.google.dev/gemini-api/docs/models/gemini[Gemini API models].
+
+`rate_limit`:::
+(Optional, object)
+By default, the `googleaistudio` service sets the number of requests allowed per minute to `360`.
+This helps to minimize the number of rate limit errors returned from Google AI Studio.
+To modify this, set the `requests_per_minute` setting of this object in your service settings:
++
+--
+include::inference-shared.asciidoc[tag=request-per-minute-example]
+--
+
+
+[discrete]
+[[inference-example-google-ai-studio]]
+==== Google AI Studio service example
+
+The following example shows how to create an {infer} endpoint called
+`google_ai_studio_completion` to perform a `completion` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/completion/google_ai_studio_completion
+{
+ "service": "googleaistudio",
+ "service_settings": {
+ "api_key": "",
+ "model_id": ""
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
\ No newline at end of file
diff --git a/docs/reference/inference/service-google-vertex-ai.asciidoc b/docs/reference/inference/service-google-vertex-ai.asciidoc
new file mode 100644
index 0000000000000..640553ab74626
--- /dev/null
+++ b/docs/reference/inference/service-google-vertex-ai.asciidoc
@@ -0,0 +1,135 @@
+[[infer-service-google-vertex-ai]]
+=== Google Vertex AI {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `googlevertexai` service.
+
+
+[discrete]
+[[infer-service-google-vertex-ai-api-request]]
+==== {api-request-title}
+
+`PUT /_inference//`
+
+[discrete]
+[[infer-service-google-vertex-ai-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `rerank`
+* `text_embedding`.
+--
+
+[discrete]
+[[infer-service-google-vertex-ai-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string)
+The type of service supported for the specified task type. In this case,
+`googlevertexai`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `googlevertexai` service.
+--
+
+`service_account_json`:::
+(Required, string)
+A valid service account in json format for the Google Vertex AI API.
+
+`model_id`:::
+(Required, string)
+The name of the model to use for the {infer} task.
+You can find the supported models at https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api[Text embeddings API].
+
+`location`:::
+(Required, string)
+The name of the location to use for the {infer} task.
+You find the supported locations at https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations[Generative AI on Vertex AI locations].
+
+`project_id`:::
+(Required, string)
+The name of the project to use for the {infer} task.
+
+`rate_limit`:::
+(Optional, object)
+By default, the `googlevertexai` service sets the number of requests allowed per minute to `30.000`.
+This helps to minimize the number of rate limit errors returned from Google Vertex AI.
+To modify this, set the `requests_per_minute` setting of this object in your service settings:
++
+--
+include::inference-shared.asciidoc[tag=request-per-minute-example]
+
+More information about the rate limits for Google Vertex AI can be found in the https://cloud.google.com/vertex-ai/docs/quotas[Google Vertex AI Quotas docs].
+--
+
+`task_settings`::
+(Optional, object)
+include::inference-shared.asciidoc[tag=task-settings]
++
+.`task_settings` for the `rerank` task type
+[%collapsible%closed]
+=====
+`top_n`:::
+(optional, boolean)
+Specifies the number of the top n documents, which should be returned.
+=====
++
+.`task_settings` for the `text_embedding` task type
+[%collapsible%closed]
+=====
+`auto_truncate`:::
+(optional, boolean)
+Specifies if the API truncates inputs longer than the maximum token length automatically.
+=====
+
+[discrete]
+[[inference-example-google-vertex-ai]]
+==== Google Vertex AI service example
+
+The following example shows how to create an {infer} endpoint called
+`google_vertex_ai_embeddings` to perform a `text_embedding` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/google_vertex_ai_embeddings
+{
+ "service": "googlevertexai",
+ "service_settings": {
+ "service_account_json": "",
+ "model_id": "",
+ "location": "",
+ "project_id": ""
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+The next example shows how to create an {infer} endpoint called
+`google_vertex_ai_rerank` to perform a `rerank` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/rerank/google_vertex_ai_rerank
+{
+ "service": "googlevertexai",
+ "service_settings": {
+ "service_account_json": "",
+ "project_id": ""
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
diff --git a/docs/reference/inference/service-hugging-face.asciidoc b/docs/reference/inference/service-hugging-face.asciidoc
new file mode 100644
index 0000000000000..177a15177d21f
--- /dev/null
+++ b/docs/reference/inference/service-hugging-face.asciidoc
@@ -0,0 +1,114 @@
+[[infer-service-hugging-face]]
+=== HuggingFace {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `hugging_face` service.
+
+
+[discrete]
+[[infer-service-hugging-face-api-request]]
+==== {api-request-title}
+
+`PUT /_inference//`
+
+[discrete]
+[[infer-service-hugging-face-api-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `text_embedding`.
+--
+
+[discrete]
+[[infer-service-hugging-face-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string)
+The type of service supported for the specified task type. In this case,
+`hugging_face`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `hugging_face` service.
+--
+
+`api_key`:::
+(Required, string)
+A valid access token of your Hugging Face account.
+You can find your Hugging Face access tokens or you can create a new one
+https://huggingface.co/settings/tokens[on the settings page].
++
+--
+include::inference-shared.asciidoc[tag=api-key-admonition]
+--
+
+`url`:::
+(Required, string)
+The URL endpoint to use for the requests.
+
+`rate_limit`:::
+(Optional, object)
+By default, the `huggingface` service sets the number of requests allowed per minute to `3000`.
+This helps to minimize the number of rate limit errors returned from Hugging Face.
+To modify this, set the `requests_per_minute` setting of this object in your service settings:
++
+--
+include::inference-shared.asciidoc[tag=request-per-minute-example]
+--
+
+
+[discrete]
+[[inference-example-hugging-face]]
+==== Hugging Face service example
+
+The following example shows how to create an {infer} endpoint called
+`hugging-face-embeddings` to perform a `text_embedding` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/hugging-face-embeddings
+{
+ "service": "hugging_face",
+ "service_settings": {
+ "api_key": "", <1>
+ "url": "" <2>
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+<1> A valid Hugging Face access token.
+You can find on the
+https://huggingface.co/settings/tokens[settings page of your account].
+<2> The {infer} endpoint URL you created on Hugging Face.
+
+Create a new {infer} endpoint on
+https://ui.endpoints.huggingface.co/[the Hugging Face endpoint page] to get an endpoint URL.
+Select the model you want to use on the new endpoint creation page - for example `intfloat/e5-small-v2` - then select the `Sentence Embeddings`
+task under the Advanced configuration section.
+Create the endpoint.
+Copy the URL after the endpoint initialization has been finished.
+
+[discrete]
+[[inference-example-hugging-face-supported-models]]
+The list of recommended models for the Hugging Face service:
+
+* https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2[all-MiniLM-L6-v2]
+* https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2[all-MiniLM-L12-v2]
+* https://huggingface.co/sentence-transformers/all-mpnet-base-v2[all-mpnet-base-v2]
+* https://huggingface.co/intfloat/e5-base-v2[e5-base-v2]
+* https://huggingface.co/intfloat/e5-small-v2[e5-small-v2]
+* https://huggingface.co/intfloat/multilingual-e5-base[multilingual-e5-base]
+* https://huggingface.co/intfloat/multilingual-e5-small[multilingual-e5-small]
\ No newline at end of file
diff --git a/docs/reference/inference/service-mistral.asciidoc b/docs/reference/inference/service-mistral.asciidoc
new file mode 100644
index 0000000000000..077e610191705
--- /dev/null
+++ b/docs/reference/inference/service-mistral.asciidoc
@@ -0,0 +1,99 @@
+[[infer-service-mistral]]
+=== Mistral {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `mistral` service.
+
+
+[discrete]
+[[infer-service-mistral-api-request]]
+==== {api-request-title}
+
+`PUT /_inference//`
+
+[discrete]
+[[infer-service-mistral-api-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `text_embedding`.
+--
+
+[discrete]
+[[infer-service-mistral-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string)
+The type of service supported for the specified task type. In this case,
+`mistral`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `mistral` service.
+--
+
+`api_key`:::
+(Required, string)
+A valid API key for your Mistral account.
+You can find your Mistral API keys or you can create a new one
+https://console.mistral.ai/api-keys/[on the API Keys page].
++
+--
+include::inference-shared.asciidoc[tag=api-key-admonition]
+--
+
+`model`:::
+(Required, string)
+The name of the model to use for the {infer} task.
+Refer to the https://docs.mistral.ai/getting-started/models/[Mistral models documentation]
+for the list of available text embedding models.
+
+`max_input_tokens`:::
+(Optional, integer)
+Allows you to specify the maximum number of tokens per input before chunking occurs.
+
+`rate_limit`:::
+(Optional, object)
+By default, the `mistral` service sets the number of requests allowed per minute to `240`.
+This helps to minimize the number of rate limit errors returned from the Mistral API.
+To modify this, set the `requests_per_minute` setting of this object in your service settings:
++
+--
+include::inference-shared.asciidoc[tag=request-per-minute-example]
+--
+
+
+[discrete]
+[[inference-example-mistral]]
+==== Mistral service example
+
+The following example shows how to create an {infer} endpoint called
+`mistral-embeddings-test` to perform a `text_embedding` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/mistral-embeddings-test
+{
+ "service": "mistral",
+ "service_settings": {
+ "api_key": "",
+ "model": "mistral-embed" <1>
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+<1> The `model` must be the ID of a text embedding model which can be found in the
+https://docs.mistral.ai/getting-started/models/[Mistral models documentation].
\ No newline at end of file
diff --git a/docs/reference/inference/service-openai.asciidoc b/docs/reference/inference/service-openai.asciidoc
new file mode 100644
index 0000000000000..075e76dc7d741
--- /dev/null
+++ b/docs/reference/inference/service-openai.asciidoc
@@ -0,0 +1,147 @@
+[[infer-service-openai]]
+=== OpenAI {infer} service
+
+Creates an {infer} endpoint to perform an {infer} task with the `openai` service.
+
+
+[discrete]
+[[infer-service-openai-api-request]]
+==== {api-request-title}
+
+`PUT /_inference//`
+
+[discrete]
+[[infer-service-openai-api-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=inference-id]
+
+``::
+(Required, string)
+include::inference-shared.asciidoc[tag=task-type]
++
+--
+Available task types:
+
+* `completion`,
+* `text_embedding`.
+--
+
+[discrete]
+[[infer-service-openai-api-request-body]]
+==== {api-request-body-title}
+
+`service`::
+(Required, string)
+The type of service supported for the specified task type. In this case,
+`openai`.
+
+`service_settings`::
+(Required, object)
+include::inference-shared.asciidoc[tag=service-settings]
++
+--
+These settings are specific to the `openai` service.
+--
+
+`api_key`:::
+(Required, string)
+A valid API key of your OpenAI account.
+You can find your OpenAI API keys in your OpenAI account under the
+https://platform.openai.com/api-keys[API keys section].
++
+--
+include::inference-shared.asciidoc[tag=api-key-admonition]
+--
+
+`model_id`:::
+(Required, string)
+The name of the model to use for the {infer} task.
+Refer to the
+https://platform.openai.com/docs/guides/embeddings/what-are-embeddings[OpenAI documentation]
+for the list of available text embedding models.
+
+`organization_id`:::
+(Optional, string)
+The unique identifier of your organization.
+You can find the Organization ID in your OpenAI account under
+https://platform.openai.com/account/organization[**Settings** > **Organizations**].
+
+`url`:::
+(Optional, string)
+The URL endpoint to use for the requests.
+Can be changed for testing purposes.
+Defaults to `https://api.openai.com/v1/embeddings`.
+
+`rate_limit`:::
+(Optional, object)
+The `openai` service sets a default number of requests allowed per minute depending on the task type.
+For `text_embedding` it is set to `3000`.
+For `completion` it is set to `500`.
+This helps to minimize the number of rate limit errors returned from OpenAI.
+To modify this, set the `requests_per_minute` setting of this object in your service settings:
++
+--
+include::inference-shared.asciidoc[tag=request-per-minute-example]
+
+More information about the rate limits for OpenAI can be found in your https://platform.openai.com/account/limits[Account limits].
+--
+
+`task_settings`::
+(Optional, object)
+include::inference-shared.asciidoc[tag=task-settings]
++
+.`task_settings` for the `completion` task type
+[%collapsible%closed]
+=====
+`user`:::
+(Optional, string)
+Specifies the user issuing the request, which can be used for abuse detection.
+=====
++
+.`task_settings` for the `text_embedding` task type
+[%collapsible%closed]
+=====
+`user`:::
+(optional, string)
+Specifies the user issuing the request, which can be used for abuse detection.
+=====
+
+
+[discrete]
+[[inference-example-openai]]
+==== OpenAI service example
+
+The following example shows how to create an {infer} endpoint called
+`openai-embeddings` to perform a `text_embedding` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/openai-embeddings
+{
+ "service": "openai",
+ "service_settings": {
+ "api_key": "",
+ "model_id": "text-embedding-ada-002"
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+The next example shows how to create an {infer} endpoint called
+`openai-completion` to perform a `completion` task type.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/completion/openai-completion
+{
+ "service": "openai",
+ "service_settings": {
+ "api_key": "",
+ "model_id": "gpt-3.5-turbo"
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
\ No newline at end of file
diff --git a/docs/reference/ingest/apis/enrich/enrich-stats.asciidoc b/docs/reference/ingest/apis/enrich/enrich-stats.asciidoc
index ad1ca62e37bbf..85273ee584b9a 100644
--- a/docs/reference/ingest/apis/enrich/enrich-stats.asciidoc
+++ b/docs/reference/ingest/apis/enrich/enrich-stats.asciidoc
@@ -121,6 +121,10 @@ The amount of time in milliseconds spent fetching data from the cache on success
`misses_time_in_millis`::
(Long)
The amount of time in milliseconds spent fetching data from the enrich index and updating the cache, on cache misses only.
+
+`size_in_bytes`::
+(Long)
+An _approximation_ of the size in bytes that the enrich cache takes up on the heap.
--
[[enrich-stats-api-example]]
@@ -172,7 +176,8 @@ The API returns the following response:
"misses": 0,
"evictions": 0,
"hits_time_in_millis": 0,
- "misses_time_in_millis": 0
+ "misses_time_in_millis": 0,
+ "size_in_bytes": 0
}
]
}
@@ -187,3 +192,4 @@ The API returns the following response:
// TESTRESPONSE[s/"evictions": 0/"evictions" : $body.cache_stats.0.evictions/]
// TESTRESPONSE[s/"hits_time_in_millis": 0/"hits_time_in_millis" : $body.cache_stats.0.hits_time_in_millis/]
// TESTRESPONSE[s/"misses_time_in_millis": 0/"misses_time_in_millis" : $body.cache_stats.0.misses_time_in_millis/]
+// TESTRESPONSE[s/"size_in_bytes": 0/"size_in_bytes" : $body.cache_stats.0.size_in_bytes/]
diff --git a/docs/reference/ingest/processors/redact.asciidoc b/docs/reference/ingest/processors/redact.asciidoc
index 2004e48c2ed78..6706106e92655 100644
--- a/docs/reference/ingest/processors/redact.asciidoc
+++ b/docs/reference/ingest/processors/redact.asciidoc
@@ -4,8 +4,6 @@
Redact
++++
-experimental::[]
-
The Redact processor uses the Grok rules engine to obscure
text in the input document matching the given Grok patterns. The processor can
be used to obscure Personal Identifying Information (PII) by configuring it to
diff --git a/docs/reference/ingest/search-nlp-tutorial.asciidoc b/docs/reference/ingest/search-nlp-tutorial.asciidoc
index d5eacb6951023..afdceeeb8bac2 100644
--- a/docs/reference/ingest/search-nlp-tutorial.asciidoc
+++ b/docs/reference/ingest/search-nlp-tutorial.asciidoc
@@ -256,4 +256,3 @@ In this guide, we covered how to:
* {ml-docs}/ml-nlp-deploy-models.html[Deploying a model ML guide^]
* {ml-docs}/ml-nlp-import-model.html#ml-nlp-authentication[Eland Authentication methods^]
* <>
-// * <>
diff --git a/docs/reference/mapping/fields/source-field.asciidoc b/docs/reference/mapping/fields/source-field.asciidoc
index ec824e421e015..903b301ab1a96 100644
--- a/docs/reference/mapping/fields/source-field.asciidoc
+++ b/docs/reference/mapping/fields/source-field.asciidoc
@@ -6,11 +6,11 @@ at index time. The `_source` field itself is not indexed (and thus is not
searchable), but it is stored so that it can be returned when executing
_fetch_ requests, like <> or <>.
-If disk usage is important to you then have a look at
-<> which shrinks disk usage at the cost of
-only supporting a subset of mappings and slower fetches or (not recommended)
-<> which also shrinks disk
-usage but disables many features.
+If disk usage is important to you, then consider the following options:
+
+- Using <>, which reconstructs source content at the time of retrieval instead of storing it on disk. This shrinks disk usage, at the cost of slower access to `_source` in <> and <> queries.
+- <>. This shrinks disk
+usage but disables features that rely on `_source`.
include::synthetic-source.asciidoc[]
@@ -43,7 +43,7 @@ available then a number of features are not supported:
* The <>, <>,
and <> APIs.
-* In the {kib} link:{kibana-ref}/discover.html[Discover] application, field data will not be displayed.
+* In the {kib} link:{kibana-ref}/discover.html[Discover] application, field data will not be displayed.
* On the fly <>.
diff --git a/docs/reference/mapping/fields/synthetic-source.asciidoc b/docs/reference/mapping/fields/synthetic-source.asciidoc
index a0e7aed177a9c..ccea38cf602da 100644
--- a/docs/reference/mapping/fields/synthetic-source.asciidoc
+++ b/docs/reference/mapping/fields/synthetic-source.asciidoc
@@ -28,45 +28,22 @@ PUT idx
While this on the fly reconstruction is *generally* slower than saving the source
documents verbatim and loading them at query time, it saves a lot of storage
-space.
+space. Additional latency can be avoided by not loading `_source` field in queries when it is not needed.
+
+[[synthetic-source-fields]]
+===== Supported fields
+Synthetic `_source` is supported by all field types. Depending on implementation details, field types have different properties when used with synthetic `_source`.
+
+<> construct synthetic `_source` using existing data, most commonly <> and <>. For these field types, no additional space is needed to store the contents of `_source` field. Due to the storage layout of <>, the generated `_source` field undergoes <> compared to original document.
+
+For all other field types, the original value of the field is stored as is, in the same way as the `_source` field in non-synthetic mode. In this case there are no modifications and field data in `_source` is the same as in the original document. Similarly, malformed values of fields that use <> or <> need to be stored as is. This approach is less storage efficient since data needed for `_source` reconstruction is stored in addition to other data required to index the field (like `doc_values`).
[[synthetic-source-restrictions]]
===== Synthetic `_source` restrictions
-There are a couple of restrictions to be aware of:
+Synthetic `_source` cannot be used together with field mappings that use <>.
-* When you retrieve synthetic `_source` content it undergoes minor
-<> compared to the original JSON.
-* Synthetic `_source` can be used with indices that contain only these field
-types:
-
-** <>
-** {plugins}/mapper-annotated-text-usage.html#annotated-text-synthetic-source[`annotated-text`]
-** <>
-** <>
-** <>
-** <>
-** <>
-** <>
-** <>
-** <