From 774f1a455053a3fa543059a56af9621f461f14c2 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 17 Jul 2024 06:49:35 +1000 Subject: [PATCH 01/65] Mute org.elasticsearch.nativeaccess.PreallocateTests testPreallocate #110948 --- muted-tests.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index af31afa66a53f..304d40b12d6ca 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -4,8 +4,7 @@ tests: method: "testGuessIsDayFirstFromLocale" - class: "org.elasticsearch.test.rest.ClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/108857" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ - \ dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" - class: "org.elasticsearch.upgrades.SearchStatesIT" issue: "https://github.com/elastic/elasticsearch/issues/108991" method: "testCanMatch" @@ -14,8 +13,7 @@ tests: method: "testTrainedModelInference" - class: "org.elasticsearch.xpack.security.CoreWithSecurityClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109188" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ - \ dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" - class: "org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT" issue: "https://github.com/elastic/elasticsearch/issues/109189" method: "test {p0=esql/70_locale/Date format with Italian locale}" @@ -30,8 +28,7 @@ tests: method: "testTimestampFieldTypeExposedByAllIndicesServices" - class: "org.elasticsearch.analysis.common.CommonAnalysisClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109318" - method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling\ - \ (too complex pattern)}" + method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling (too complex pattern)}" - class: "org.elasticsearch.xpack.ml.integration.ClassificationHousePricingIT" issue: "https://github.com/elastic/elasticsearch/issues/101598" method: "testFeatureImportanceValues" @@ -93,6 +90,9 @@ tests: - class: "org.elasticsearch.xpack.watcher.test.integration.HistoryIntegrationTests" issue: "https://github.com/elastic/elasticsearch/issues/110885" method: "testPayloadInputWithDotsInFieldNameWorks" +- class: org.elasticsearch.nativeaccess.PreallocateTests + method: testPreallocate + issue: https://github.com/elastic/elasticsearch/issues/110948 # Examples: # From 058b7bbb6d72919588b9cd31ed9a45b29e8672e8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 17 Jul 2024 06:49:41 +1000 Subject: [PATCH 02/65] Mute org.elasticsearch.nativeaccess.VectorSystemPropertyTests testSystemPropertyDisabled #110949 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 304d40b12d6ca..f78be99f9affc 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -93,6 +93,9 @@ tests: - class: org.elasticsearch.nativeaccess.PreallocateTests method: testPreallocate issue: https://github.com/elastic/elasticsearch/issues/110948 +- class: org.elasticsearch.nativeaccess.VectorSystemPropertyTests + method: testSystemPropertyDisabled + issue: https://github.com/elastic/elasticsearch/issues/110949 # Examples: # From d990664300bd8aaf8647b739dc90bf30b507cac7 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 17 Jul 2024 06:50:23 +1000 Subject: [PATCH 03/65] Mute org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT test {stats.Count_or_null SYNC #2} #110950 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index f78be99f9affc..fe6625de3b030 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -96,6 +96,9 @@ tests: - class: org.elasticsearch.nativeaccess.VectorSystemPropertyTests method: testSystemPropertyDisabled issue: https://github.com/elastic/elasticsearch/issues/110949 +- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT + method: "test {stats.Count_or_null SYNC #2}" + issue: https://github.com/elastic/elasticsearch/issues/110950 # Examples: # From 99986d68e4ba5768b9963af85ff7348b27ebe73d Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 16 Jul 2024 23:59:23 +0200 Subject: [PATCH 04/65] Optimize primitive writes in RecyclerBytesStreamOutput (#110342) We can inline the capacity check in these 2 spots as well, like we have for writing `long` etc. This gives a visible speedup in profiling benchmarks (we do quite a bit of single byte writing unfortunately). --- .../io/stream/RecyclerBytesStreamOutput.java | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/RecyclerBytesStreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/RecyclerBytesStreamOutput.java index c4857a8b85ea3..b83ebc6a8c64f 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/RecyclerBytesStreamOutput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/RecyclerBytesStreamOutput.java @@ -56,10 +56,14 @@ public long position() { @Override public void writeByte(byte b) { - ensureCapacity(1); + int currentPageOffset = this.currentPageOffset; + if (1 > (pageSize - currentPageOffset)) { + ensureCapacity(1); + currentPageOffset = 0; + } BytesRef currentPage = pages.get(pageIndex).v(); currentPage.bytes[currentPage.offset + currentPageOffset] = b; - currentPageOffset++; + this.currentPageOffset = currentPageOffset + 1; } @Override @@ -72,7 +76,12 @@ public void writeBytes(byte[] b, int offset, int length) { Objects.checkFromIndexSize(offset, length, b.length); // get enough pages for new size - ensureCapacity(length); + final int pageSize = this.pageSize; + int currentPageOffset = this.currentPageOffset; + if (length > pageSize - currentPageOffset) { + ensureCapacity(length); + currentPageOffset = this.currentPageOffset; + } // bulk copy int bytesToCopy = length; @@ -92,6 +101,7 @@ public void writeBytes(byte[] b, int offset, int length) { } j++; } + this.currentPageOffset = currentPageOffset; // advance pageIndex += j; @@ -99,12 +109,13 @@ public void writeBytes(byte[] b, int offset, int length) { @Override public void writeInt(int i) throws IOException { + final int currentPageOffset = this.currentPageOffset; if (4 > (pageSize - currentPageOffset)) { super.writeInt(i); } else { BytesRef currentPage = pages.get(pageIndex).v(); VH_BE_INT.set(currentPage.bytes, currentPage.offset + currentPageOffset, i); - currentPageOffset += 4; + this.currentPageOffset = currentPageOffset + 4; } } @@ -121,12 +132,13 @@ public void writeIntLE(int i) throws IOException { @Override public void writeLong(long i) throws IOException { + final int currentPageOffset = this.currentPageOffset; if (8 > (pageSize - currentPageOffset)) { super.writeLong(i); } else { BytesRef currentPage = pages.get(pageIndex).v(); VH_BE_LONG.set(currentPage.bytes, currentPage.offset + currentPageOffset, i); - currentPageOffset += 8; + this.currentPageOffset = currentPageOffset + 8; } } @@ -242,9 +254,8 @@ public BytesReference bytes() { } private void ensureCapacity(int bytesNeeded) { - if (bytesNeeded > pageSize - currentPageOffset) { - ensureCapacityFromPosition(position() + bytesNeeded); - } + assert bytesNeeded > pageSize - currentPageOffset; + ensureCapacityFromPosition(position() + bytesNeeded); } private void ensureCapacityFromPosition(long newPosition) { From 8968b47f25b56d95ad15022adb94d79aa251db65 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 17 Jul 2024 05:46:11 +0100 Subject: [PATCH 05/65] Drop intermediate ID set in `TransportGetSnapshotsAction` (#110868) Today we build an `Iterator` to compute the snapshot IDs whose `SnapshotInfo` should be returned. This iterator first looks at ongoing snapshots and then the completed ones. When consuming items from this iterator we check again whether the snapshot ID corresponds to an ongoing snapshot and produce the corresponding `SnapshotInfo` differently in that case. This forces us to create a potentially-massive intermediate set of all the `SnapshotId` values we need to look up. There's no need for these two phases, we can instead build the two different ways of creating a `SnapshotInfo` into the iterator itself, saving the large intermediate set of IDs and simplifying the implementation. --- .../get/TransportGetSnapshotsAction.java | 150 +++++++++++------- 1 file changed, 89 insertions(+), 61 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java index a7352f7f4e3a2..25ae73282d7b6 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java @@ -284,13 +284,13 @@ private void loadSnapshotInfos(String repositoryName, @Nullable RepositoryData r ensureRequiredNamesPresent(repositoryName, repositoryData); if (verbose) { - loadSnapshotInfos(repository, getSnapshotIdIterator(repositoryName, repositoryData), listener); + loadSnapshotInfos(getAsyncSnapshotInfoIterator(repository, repositoryData), listener); } else { assert fromSortValuePredicates.isMatchAll() : "filtering is not supported in non-verbose mode"; assert slmPolicyPredicate == SlmPolicyPredicate.MATCH_ALL_POLICIES : "filtering is not supported in non-verbose mode"; addSimpleSnapshotInfos( - getSnapshotIdIterator(repositoryName, repositoryData), + getAsyncSnapshotInfoIterator(repository, repositoryData), repositoryName, repositoryData, snapshotsInProgress.forRepo(repositoryName).stream().map(entry -> SnapshotInfo.inProgress(entry).basic()).toList() @@ -328,88 +328,119 @@ private void ensureRequiredNamesPresent(String repositoryName, @Nullable Reposit } /** - * @return an iterator over the snapshot IDs in the given repository which match {@link #snapshotNamePredicate}. + * An asynchronous supplier of a {@link SnapshotInfo}. */ - private Iterator getSnapshotIdIterator(String repositoryName, @Nullable RepositoryData repositoryData) { + private interface AsyncSnapshotInfo { - // now iterate through the snapshots again, returning matching IDs (or null) + /** + * @return the {@link SnapshotId} of the {@link SnapshotInfo} to be retrieved. + */ + SnapshotId getSnapshotId(); + + /** + * @param listener completed, possibly asynchronously, with the appropriate {@link SnapshotInfo}. + */ + void getSnapshotInfo(ActionListener listener); + } + + /** + * @return an {@link AsyncSnapshotInfo} for the given in-progress snapshot entry. + */ + private static AsyncSnapshotInfo forSnapshotInProgress(SnapshotsInProgress.Entry snapshotInProgress) { + return new AsyncSnapshotInfo() { + @Override + public SnapshotId getSnapshotId() { + return snapshotInProgress.snapshot().getSnapshotId(); + } + + @Override + public void getSnapshotInfo(ActionListener listener) { + listener.onResponse(SnapshotInfo.inProgress(snapshotInProgress)); + } + + @Override + public String toString() { + return snapshotInProgress.snapshot().toString(); + } + }; + } + + /** + * @return an {@link AsyncSnapshotInfo} for the given completed snapshot. + */ + private AsyncSnapshotInfo forCompletedSnapshot(Repository repository, SnapshotId snapshotId) { + return new AsyncSnapshotInfo() { + @Override + public SnapshotId getSnapshotId() { + return snapshotId; + } + + @Override + public void getSnapshotInfo(ActionListener listener) { + getSnapshotInfoExecutor.getSnapshotInfo(repository, snapshotId, listener); + } + + @Override + public String toString() { + return repository.getMetadata().name() + ":" + snapshotId; + } + }; + } + + /** + * @return an iterator of {@link AsyncSnapshotInfo} instances in the given repository which match {@link #snapshotNamePredicate}. + */ + private Iterator getAsyncSnapshotInfoIterator(Repository repository, @Nullable RepositoryData repositoryData) { + // now iterate through the snapshots again, returning SnapshotInfo suppliers for ones with matching IDs final Set matchingInProgressSnapshots = new HashSet<>(); return Iterators.concat( // matching in-progress snapshots first - Iterators.filter( - Iterators.map( - snapshotsInProgress.forRepo(repositoryName).iterator(), - snapshotInProgress -> snapshotInProgress.snapshot().getSnapshotId() - ), - snapshotId -> { + Iterators.map( + Iterators.filter(snapshotsInProgress.forRepo(repository.getMetadata().name()).iterator(), snapshotInProgress -> { + final var snapshotId = snapshotInProgress.snapshot().getSnapshotId(); if (snapshotNamePredicate.test(snapshotId.getName(), true)) { matchingInProgressSnapshots.add(snapshotId); return true; } else { return false; } - } + }), + GetSnapshotsOperation::forSnapshotInProgress ), repositoryData == null // only returning in-progress snapshots ? Collections.emptyIterator() // also return matching completed snapshots (except any ones that were also found to be in-progress) - : Iterators.filter( - repositoryData.getSnapshotIds().iterator(), - snapshotId -> matchingInProgressSnapshots.contains(snapshotId) == false - && snapshotNamePredicate.test(snapshotId.getName(), false) - && matchesPredicates(snapshotId, repositoryData) + : Iterators.map( + Iterators.filter( + repositoryData.getSnapshotIds().iterator(), + snapshotId -> matchingInProgressSnapshots.contains(snapshotId) == false + && snapshotNamePredicate.test(snapshotId.getName(), false) + && matchesPredicates(snapshotId, repositoryData) + ), + snapshotId -> forCompletedSnapshot(repository, snapshotId) ) ); } - private void loadSnapshotInfos(Repository repository, Iterator snapshotIdIterator, ActionListener listener) { + private void loadSnapshotInfos(Iterator asyncSnapshotInfoIterator, ActionListener listener) { if (cancellableTask.notifyIfCancelled(listener)) { return; } - final var repositoryName = repository.getMetadata().name(); - final AtomicInteger repositoryTotalCount = new AtomicInteger(); - final Set snapshotIdsToIterate = new HashSet<>(); - snapshotIdIterator.forEachRemaining(snapshotIdsToIterate::add); - - final List snapshots = new ArrayList<>(snapshotIdsToIterate.size()); - // first, look at the snapshots in progress - final List entries = SnapshotsService.currentSnapshots( - snapshotsInProgress, - repositoryName, - snapshotIdsToIterate.stream().map(SnapshotId::getName).toList() - ); - for (SnapshotsInProgress.Entry entry : entries) { - if (snapshotIdsToIterate.remove(entry.snapshot().getSnapshotId())) { - final SnapshotInfo snapshotInfo = SnapshotInfo.inProgress(entry); - if (matchesPredicates(snapshotInfo)) { - repositoryTotalCount.incrementAndGet(); - if (afterPredicate.test(snapshotInfo)) { - snapshots.add(snapshotInfo.maybeWithoutIndices(indices)); - } - } - } - } - // then, look in the repository if there's any matching snapshots left + final var repositoryTotalCount = new AtomicInteger(); + + final List snapshots = new ArrayList<>(); + final List syncSnapshots = Collections.synchronizedList(snapshots); + SubscribableListener .newForked(l -> { try (var listeners = new RefCountingListener(l)) { - if (snapshotIdsToIterate.isEmpty()) { - return; - } - - // only need to synchronize accesses related to reading SnapshotInfo from the repo - final List syncSnapshots = Collections.synchronizedList(snapshots); - ThrottledIterator.run( - Iterators.failFast( - snapshotIdsToIterate.iterator(), - () -> cancellableTask.isCancelled() || listeners.isFailing() - ), - (ref, snapshotId) -> { + Iterators.failFast(asyncSnapshotInfoIterator, () -> cancellableTask.isCancelled() || listeners.isFailing()), + (ref, asyncSnapshotInfo) -> { final var refListener = ActionListener.runBefore(listeners.acquire(), ref::close); - getSnapshotInfoExecutor.getSnapshotInfo(repository, snapshotId, new ActionListener<>() { + asyncSnapshotInfo.getSnapshotInfo(new ActionListener<>() { @Override public void onResponse(SnapshotInfo snapshotInfo) { if (matchesPredicates(snapshotInfo)) { @@ -424,10 +455,7 @@ public void onResponse(SnapshotInfo snapshotInfo) { @Override public void onFailure(Exception e) { if (ignoreUnavailable) { - logger.warn( - Strings.format("failed to fetch snapshot info for [%s:%s]", repository, snapshotId), - e - ); + logger.warn(Strings.format("failed to fetch snapshot info for [%s]", asyncSnapshotInfo), e); refListener.onResponse(null); } else { refListener.onFailure(e); @@ -455,7 +483,7 @@ private void addResults(int repositoryTotalCount, List snapshots) } private void addSimpleSnapshotInfos( - final Iterator snapshotIdIterator, + final Iterator snapshotIdIterator, final String repositoryName, @Nullable final RepositoryData repositoryData, final List currentSnapshots @@ -467,7 +495,7 @@ private void addSimpleSnapshotInfos( } // else want non-current snapshots as well, which are found in the repository data final Set toResolve = new HashSet<>(); - snapshotIdIterator.forEachRemaining(toResolve::add); + snapshotIdIterator.forEachRemaining(item -> toResolve.add(item.getSnapshotId())); List snapshotInfos = new ArrayList<>(currentSnapshots.size() + toResolve.size()); int repositoryTotalCount = 0; From ef61d3c78b72f5e6e4f426fb9d7045b91455ba1f Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 17 Jul 2024 07:22:03 +0100 Subject: [PATCH 06/65] Clean up non-verbose get-snapshots impl (#110957) Rather than having a completely separate path for `verbose == false`, we can combine the two and just distinguish them by how they produce the `SnapshotInfo` instances in the iterator. This saves allocating a potentially-massive set of all the IDs during the computation. --- .../get/TransportGetSnapshotsAction.java | 123 +++++++----------- 1 file changed, 49 insertions(+), 74 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java index 25ae73282d7b6..84f98e36f2a37 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java @@ -232,6 +232,11 @@ private class GetSnapshotsOperation { threadPool.info(ThreadPool.Names.SNAPSHOT_META).getMax(), cancellableTask::isCancelled ); + + if (verbose == false) { + assert fromSortValuePredicates.isMatchAll() : "filtering is not supported in non-verbose mode"; + assert slmPolicyPredicate == SlmPolicyPredicate.MATCH_ALL_POLICIES : "filtering is not supported in non-verbose mode"; + } } void getMultipleReposSnapshotInfo(ActionListener listener) { @@ -278,25 +283,10 @@ private boolean skipRepository(String repositoryName) { private void loadSnapshotInfos(String repositoryName, @Nullable RepositoryData repositoryData, ActionListener listener) { assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); - cancellableTask.ensureNotCancelled(); final var repository = repositoriesService.repository(repositoryName); ensureRequiredNamesPresent(repositoryName, repositoryData); - - if (verbose) { - loadSnapshotInfos(getAsyncSnapshotInfoIterator(repository, repositoryData), listener); - } else { - assert fromSortValuePredicates.isMatchAll() : "filtering is not supported in non-verbose mode"; - assert slmPolicyPredicate == SlmPolicyPredicate.MATCH_ALL_POLICIES : "filtering is not supported in non-verbose mode"; - - addSimpleSnapshotInfos( - getAsyncSnapshotInfoIterator(repository, repositoryData), - repositoryName, - repositoryData, - snapshotsInProgress.forRepo(repositoryName).stream().map(entry -> SnapshotInfo.inProgress(entry).basic()).toList() - ); - listener.onResponse(null); - } + loadSnapshotInfos(getAsyncSnapshotInfoIterator(repository, repositoryData), listener); } /** @@ -346,7 +336,7 @@ private interface AsyncSnapshotInfo { /** * @return an {@link AsyncSnapshotInfo} for the given in-progress snapshot entry. */ - private static AsyncSnapshotInfo forSnapshotInProgress(SnapshotsInProgress.Entry snapshotInProgress) { + private AsyncSnapshotInfo forSnapshotInProgress(SnapshotsInProgress.Entry snapshotInProgress) { return new AsyncSnapshotInfo() { @Override public SnapshotId getSnapshotId() { @@ -355,7 +345,8 @@ public SnapshotId getSnapshotId() { @Override public void getSnapshotInfo(ActionListener listener) { - listener.onResponse(SnapshotInfo.inProgress(snapshotInProgress)); + final var snapshotInfo = SnapshotInfo.inProgress(snapshotInProgress); + listener.onResponse(verbose ? snapshotInfo : snapshotInfo.basic()); } @Override @@ -368,7 +359,12 @@ public String toString() { /** * @return an {@link AsyncSnapshotInfo} for the given completed snapshot. */ - private AsyncSnapshotInfo forCompletedSnapshot(Repository repository, SnapshotId snapshotId) { + private AsyncSnapshotInfo forCompletedSnapshot( + Repository repository, + SnapshotId snapshotId, + RepositoryData repositoryData, + Map> indicesLookup + ) { return new AsyncSnapshotInfo() { @Override public SnapshotId getSnapshotId() { @@ -377,7 +373,20 @@ public SnapshotId getSnapshotId() { @Override public void getSnapshotInfo(ActionListener listener) { - getSnapshotInfoExecutor.getSnapshotInfo(repository, snapshotId, listener); + if (verbose) { + getSnapshotInfoExecutor.getSnapshotInfo(repository, snapshotId, listener); + } else { + ActionListener.completeWith( + listener, + () -> new SnapshotInfo( + new Snapshot(repository.getMetadata().name(), snapshotId), + indicesLookup.getOrDefault(snapshotId, Collections.emptyList()), + Collections.emptyList(), + Collections.emptyList(), + repositoryData.getSnapshotState(snapshotId) + ) + ); + } } @Override @@ -393,6 +402,7 @@ public String toString() { private Iterator getAsyncSnapshotInfoIterator(Repository repository, @Nullable RepositoryData repositoryData) { // now iterate through the snapshots again, returning SnapshotInfo suppliers for ones with matching IDs final Set matchingInProgressSnapshots = new HashSet<>(); + final var indicesLookup = getIndicesLookup(repositoryData); return Iterators.concat( // matching in-progress snapshots first Iterators.map( @@ -405,7 +415,7 @@ private Iterator getAsyncSnapshotInfoIterator(Repository repo return false; } }), - GetSnapshotsOperation::forSnapshotInProgress + this::forSnapshotInProgress ), repositoryData == null // only returning in-progress snapshots @@ -418,11 +428,28 @@ private Iterator getAsyncSnapshotInfoIterator(Repository repo && snapshotNamePredicate.test(snapshotId.getName(), false) && matchesPredicates(snapshotId, repositoryData) ), - snapshotId -> forCompletedSnapshot(repository, snapshotId) + snapshotId -> forCompletedSnapshot(repository, snapshotId, repositoryData, indicesLookup) ) ); } + @Nullable + private Map> getIndicesLookup(RepositoryData repositoryData) { + if (repositoryData == null || verbose || indices == false) { + return Map.of(); + } + + final Map> snapshotsToIndices = new HashMap<>(); + for (IndexId indexId : repositoryData.getIndices().values()) { + for (SnapshotId snapshotId : repositoryData.getSnapshots(indexId)) { + if (snapshotNamePredicate.test(snapshotId.getName(), false) && matchesPredicates(snapshotId, repositoryData)) { + snapshotsToIndices.computeIfAbsent(snapshotId, (k) -> new ArrayList<>()).add(indexId.getName()); + } + } + } + return snapshotsToIndices; + } + private void loadSnapshotInfos(Iterator asyncSnapshotInfoIterator, ActionListener listener) { if (cancellableTask.notifyIfCancelled(listener)) { return; @@ -482,58 +509,6 @@ private void addResults(int repositoryTotalCount, List snapshots) allSnapshotInfos.add(snapshots); } - private void addSimpleSnapshotInfos( - final Iterator snapshotIdIterator, - final String repositoryName, - @Nullable final RepositoryData repositoryData, - final List currentSnapshots - ) { - if (repositoryData == null) { - // only want current snapshots - addResults(currentSnapshots.size(), currentSnapshots.stream().filter(afterPredicate).toList()); - return; - } // else want non-current snapshots as well, which are found in the repository data - - final Set toResolve = new HashSet<>(); - snapshotIdIterator.forEachRemaining(item -> toResolve.add(item.getSnapshotId())); - - List snapshotInfos = new ArrayList<>(currentSnapshots.size() + toResolve.size()); - int repositoryTotalCount = 0; - for (SnapshotInfo snapshotInfo : currentSnapshots) { - assert snapshotInfo.startTime() == 0L && snapshotInfo.endTime() == 0L && snapshotInfo.totalShards() == 0L : snapshotInfo; - if (toResolve.remove(snapshotInfo.snapshot().getSnapshotId())) { - repositoryTotalCount += 1; - if (afterPredicate.test(snapshotInfo)) { - snapshotInfos.add(snapshotInfo); - } - } - } - Map> snapshotsToIndices = new HashMap<>(); - if (indices) { - for (IndexId indexId : repositoryData.getIndices().values()) { - for (SnapshotId snapshotId : repositoryData.getSnapshots(indexId)) { - if (toResolve.contains(snapshotId)) { - snapshotsToIndices.computeIfAbsent(snapshotId, (k) -> new ArrayList<>()).add(indexId.getName()); - } - } - } - } - for (SnapshotId snapshotId : toResolve) { - final var snapshotInfo = new SnapshotInfo( - new Snapshot(repositoryName, snapshotId), - snapshotsToIndices.getOrDefault(snapshotId, Collections.emptyList()), - Collections.emptyList(), - Collections.emptyList(), - repositoryData.getSnapshotState(snapshotId) - ); - repositoryTotalCount += 1; - if (afterPredicate.test(snapshotInfo)) { - snapshotInfos.add(snapshotInfo); - } - } - addResults(repositoryTotalCount, snapshotInfos); - } - private GetSnapshotsResponse buildResponse() { assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); cancellableTask.ensureNotCancelled(); From 5766491ad4731a4b556804126635fd5b6b6bea3f Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 17 Jul 2024 07:38:54 +0100 Subject: [PATCH 07/65] Remove `PlainActionFuture#get` utility (#110955) This utility is almost entirely used in tests where an infinite wait is inappropriate, and the exception-mangling behaviour is unhelpful, so this commit replaces it with safer alternatives. There should be no need for an easy way to block production threads forever. --- ...eCloudStorageBlobStoreRepositoryTests.java | 19 +-- .../s3/S3BlobStoreRepositoryTests.java | 27 +--- .../netty4/SimpleNetty4TransportTests.java | 22 +-- .../readiness/ReadinessClusterIT.java | 39 +++-- .../snapshots/CloneSnapshotIT.java | 58 +++---- .../CorruptedBlobStoreRepositoryIT.java | 25 +--- .../action/support/PlainActionFuture.java | 7 - .../support/UnsafePlainActionFuture.java | 7 - .../blobstore/BlobStoreRepository.java | 4 +- .../support/ThreadedActionListenerTests.java | 8 +- .../ParentTaskAssigningClientTests.java | 8 +- .../coordination/NodeLeftExecutorTests.java | 14 +- .../AtomicRegisterPreVoteCollectorTests.java | 7 +- .../stateless/StoreHeartbeatServiceTests.java | 20 +-- .../blobstore/fs/FsBlobContainerTests.java | 13 +- .../common/component/LifecycleTests.java | 6 +- .../http/DefaultRestChannelTests.java | 3 +- .../IndexShardOperationPermitsTests.java | 18 +-- .../index/shard/IndexShardTests.java | 54 +++++-- .../BlobStoreRepositoryRestoreTests.java | 7 +- .../blobstore/BlobStoreRepositoryTests.java | 30 +++- .../ClusterConnectionManagerTests.java | 8 +- .../transport/RemoteClusterClientTests.java | 38 ++--- .../RemoteClusterConnectionTests.java | 2 +- .../TransportServiceHandshakeTests.java | 141 +++++++++--------- .../action/support/TestPlainActionFuture.java | 19 +++ .../ESBlobStoreRepositoryIntegTestCase.java | 11 +- .../AbstractSnapshotIntegTestCase.java | 26 +--- .../org/elasticsearch/test/ESTestCase.java | 4 +- .../AbstractSimpleTransportTestCase.java | 61 +++++--- .../DisruptableMockTransportTests.java | 12 +- .../shared/SharedBlobCacheServiceTests.java | 22 ++- .../ShardFollowTaskReplicationTests.java | 6 +- .../SourceOnlySnapshotShardTests.java | 12 +- ...pleSecurityNetty4ServerTransportTests.java | 64 ++++---- 35 files changed, 400 insertions(+), 422 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/action/support/TestPlainActionFuture.java diff --git a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index 6d2c015d7d922..e6625ea1b7f15 100644 --- a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -22,8 +22,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; -import org.elasticsearch.action.ActionRunnable; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.blobstore.BlobContainer; @@ -121,22 +119,13 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { return settings.build(); } - public void testDeleteSingleItem() { + public void testDeleteSingleItem() throws IOException { final String repoName = createRepository(randomRepositoryName()); final RepositoriesService repositoriesService = internalCluster().getAnyMasterNodeInstance(RepositoriesService.class); final BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(repoName); - PlainActionFuture.get( - f -> repository.threadPool() - .generic() - .execute( - ActionRunnable.run( - f, - () -> repository.blobStore() - .blobContainer(repository.basePath()) - .deleteBlobsIgnoringIfNotExists(randomPurpose(), Iterators.single("foo")) - ) - ) - ); + repository.blobStore() + .blobContainer(repository.basePath()) + .deleteBlobsIgnoringIfNotExists(randomPurpose(), Iterators.single("foo")); } public void testChunkSize() { diff --git a/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3BlobStoreRepositoryTests.java b/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3BlobStoreRepositoryTests.java index c97e26651d4ee..1132111826563 100644 --- a/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3BlobStoreRepositoryTests.java +++ b/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3BlobStoreRepositoryTests.java @@ -14,8 +14,6 @@ import com.sun.net.httpserver.HttpExchange; import com.sun.net.httpserver.HttpHandler; -import org.elasticsearch.action.ActionRunnable; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.broadcast.BroadcastResponse; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.service.ClusterService; @@ -418,23 +416,14 @@ public void testEnforcedCooldownPeriod() throws IOException { final BytesReference serialized = BytesReference.bytes( modifiedRepositoryData.snapshotsToXContent(XContentFactory.jsonBuilder(), SnapshotsService.OLD_SNAPSHOT_FORMAT) ); - PlainActionFuture.get( - f -> repository.threadPool() - .generic() - .execute( - ActionRunnable.run( - f, - () -> repository.blobStore() - .blobContainer(repository.basePath()) - .writeBlobAtomic( - randomNonDataPurpose(), - BlobStoreRepository.INDEX_FILE_PREFIX + modifiedRepositoryData.getGenId(), - serialized, - true - ) - ) - ) - ); + repository.blobStore() + .blobContainer(repository.basePath()) + .writeBlobAtomic( + randomNonDataPurpose(), + BlobStoreRepository.INDEX_FILE_PREFIX + modifiedRepositoryData.getGenId(), + serialized, + true + ); final String newSnapshotName = "snapshot-new"; final long beforeThrottledSnapshot = repository.threadPool().relativeTimeInNanos(); diff --git a/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/SimpleNetty4TransportTests.java b/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/SimpleNetty4TransportTests.java index 6eaddf51c02b4..cedb68b25a4bf 100644 --- a/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/SimpleNetty4TransportTests.java +++ b/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/SimpleNetty4TransportTests.java @@ -86,16 +86,13 @@ public void executeHandshake( } public void testConnectException() throws UnknownHostException { - try { - connectToNode( - serviceA, - DiscoveryNodeUtils.create("C", new TransportAddress(InetAddress.getByName("localhost"), 9876), emptyMap(), emptySet()) - ); - fail("Expected ConnectTransportException"); - } catch (ConnectTransportException e) { - assertThat(e.getMessage(), containsString("connect_exception")); - assertThat(e.getMessage(), containsString("[127.0.0.1:9876]")); - } + final var e = connectToNodeExpectFailure( + serviceA, + DiscoveryNodeUtils.create("C", new TransportAddress(InetAddress.getByName("localhost"), 9876), emptyMap(), emptySet()), + null + ); + assertThat(e.getMessage(), containsString("connect_exception")); + assertThat(e.getMessage(), containsString("[127.0.0.1:9876]")); } public void testDefaultKeepAliveSettings() throws IOException { @@ -236,10 +233,7 @@ public void testTimeoutPerConnection() throws IOException { final ConnectionProfile profile = builder.build(); // now with the 1ms timeout we got and test that is it's applied long startTime = System.nanoTime(); - ConnectTransportException ex = expectThrows( - ConnectTransportException.class, - () -> openConnection(service, second, profile) - ); + ConnectTransportException ex = openConnectionExpectFailure(service, second, profile); final long now = System.nanoTime(); final long timeTaken = TimeValue.nsecToMSec(now - startTime); assertTrue( diff --git a/server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java b/server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java index 5b44a949ab784..8335b3c0c4249 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java @@ -7,7 +7,6 @@ */ package org.elasticsearch.readiness; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.client.internal.Client; import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterState; @@ -397,24 +396,24 @@ public void testReadyAfterCorrectFileSettings() throws Exception { } private void causeClusterStateUpdate() { - PlainActionFuture.get( - fut -> internalCluster().getCurrentMasterNodeInstance(ClusterService.class) - .submitUnbatchedStateUpdateTask("poke", new ClusterStateUpdateTask() { - @Override - public ClusterState execute(ClusterState currentState) { - return ClusterState.builder(currentState).build(); - } - - @Override - public void onFailure(Exception e) { - assert false : e; - } - - @Override - public void clusterStateProcessed(ClusterState initialState, ClusterState newState) { - fut.onResponse(null); - } - }) - ); + final var latch = new CountDownLatch(1); + internalCluster().getCurrentMasterNodeInstance(ClusterService.class) + .submitUnbatchedStateUpdateTask("poke", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + return ClusterState.builder(currentState).build(); + } + + @Override + public void onFailure(Exception e) { + assert false : e; + } + + @Override + public void clusterStateProcessed(ClusterState initialState, ClusterState newState) { + latch.countDown(); + } + }); + safeAwait(latch); } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index a16a19f66085b..a035abb81d7e6 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -8,11 +8,9 @@ package org.elasticsearch.snapshots; import org.elasticsearch.action.ActionFuture; -import org.elasticsearch.action.ActionRunnable; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.internal.Client; import org.elasticsearch.cluster.SnapshotsInProgress; @@ -33,6 +31,7 @@ import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xcontent.NamedXContentRegistry; +import java.io.IOException; import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; @@ -78,8 +77,14 @@ public void testShardClone() throws Exception { } else { currentShardGen = repositoryData.shardGenerations().getShardGen(indexId, shardId); } - final ShardSnapshotResult shardSnapshotResult = PlainActionFuture.get( - f -> repository.cloneShardSnapshot(sourceSnapshotInfo.snapshotId(), targetSnapshotId, repositoryShardId, currentShardGen, f) + final ShardSnapshotResult shardSnapshotResult = safeAwait( + listener -> repository.cloneShardSnapshot( + sourceSnapshotInfo.snapshotId(), + targetSnapshotId, + repositoryShardId, + currentShardGen, + listener + ) ); final ShardGeneration newShardGeneration = shardSnapshotResult.getGeneration(); @@ -107,8 +112,14 @@ public void testShardClone() throws Exception { assertTrue(snapshotFiles.get(0).isSame(snapshotFiles.get(1))); // verify that repeated cloning is idempotent - final ShardSnapshotResult shardSnapshotResult2 = PlainActionFuture.get( - f -> repository.cloneShardSnapshot(sourceSnapshotInfo.snapshotId(), targetSnapshotId, repositoryShardId, newShardGeneration, f) + final ShardSnapshotResult shardSnapshotResult2 = safeAwait( + listener -> repository.cloneShardSnapshot( + sourceSnapshotInfo.snapshotId(), + targetSnapshotId, + repositoryShardId, + newShardGeneration, + listener + ) ); assertEquals(newShardGeneration, shardSnapshotResult2.getGeneration()); assertEquals(shardSnapshotResult.getSegmentCount(), shardSnapshotResult2.getSegmentCount()); @@ -880,21 +891,12 @@ private static BlobStoreIndexShardSnapshots readShardGeneration( BlobStoreRepository repository, RepositoryShardId repositoryShardId, ShardGeneration generation - ) { - return PlainActionFuture.get( - f -> repository.threadPool() - .generic() - .execute( - ActionRunnable.supply( - f, - () -> BlobStoreRepository.INDEX_SHARD_SNAPSHOTS_FORMAT.read( - repository.getMetadata().name(), - repository.shardContainer(repositoryShardId.index(), repositoryShardId.shardId()), - generation.toBlobNamePart(), - NamedXContentRegistry.EMPTY - ) - ) - ) + ) throws IOException { + return BlobStoreRepository.INDEX_SHARD_SNAPSHOTS_FORMAT.read( + repository.getMetadata().name(), + repository.shardContainer(repositoryShardId.index(), repositoryShardId.shardId()), + generation.toBlobNamePart(), + NamedXContentRegistry.EMPTY ); } @@ -903,18 +905,6 @@ private static BlobStoreIndexShardSnapshot readShardSnapshot( RepositoryShardId repositoryShardId, SnapshotId snapshotId ) { - return PlainActionFuture.get( - f -> repository.threadPool() - .generic() - .execute( - ActionRunnable.supply( - f, - () -> repository.loadShardSnapshot( - repository.shardContainer(repositoryShardId.index(), repositoryShardId.shardId()), - snapshotId - ) - ) - ) - ); + return repository.loadShardSnapshot(repository.shardContainer(repositoryShardId.index(), repositoryShardId.shardId()), snapshotId); } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java index dc8921e9b7120..abcac0cade456 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java @@ -8,12 +8,10 @@ package org.elasticsearch.snapshots; import org.elasticsearch.action.ActionRequestBuilder; -import org.elasticsearch.action.ActionRunnable; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse; import org.elasticsearch.action.index.IndexRequestBuilder; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.client.internal.Client; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.Metadata; @@ -34,7 +32,6 @@ import org.elasticsearch.repositories.ShardGenerations; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.fs.FsRepository; -import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.XContentFactory; import java.nio.channels.SeekableByteChannel; @@ -309,18 +306,8 @@ public void testHandlingMissingRootLevelSnapshotMetadata() throws Exception { ); logger.info("--> verify that repo is assumed in old metadata format"); - final ThreadPool threadPool = internalCluster().getCurrentMasterNodeInstance(ThreadPool.class); assertThat( - PlainActionFuture.get( - // any other executor than generic and management - f -> threadPool.executor(ThreadPool.Names.SNAPSHOT) - .execute( - ActionRunnable.supply( - f, - () -> SnapshotsService.minCompatibleVersion(IndexVersion.current(), getRepositoryData(repoName), null) - ) - ) - ), + SnapshotsService.minCompatibleVersion(IndexVersion.current(), getRepositoryData(repoName), null), is(SnapshotsService.OLD_SNAPSHOT_FORMAT) ); @@ -329,15 +316,7 @@ public void testHandlingMissingRootLevelSnapshotMetadata() throws Exception { logger.info("--> verify that repository is assumed in new metadata format after removing corrupted snapshot"); assertThat( - PlainActionFuture.get( - f -> threadPool.generic() - .execute( - ActionRunnable.supply( - f, - () -> SnapshotsService.minCompatibleVersion(IndexVersion.current(), getRepositoryData(repoName), null) - ) - ) - ), + SnapshotsService.minCompatibleVersion(IndexVersion.current(), getRepositoryData(repoName), null), is(IndexVersion.current()) ); final RepositoryData finalRepositoryData = getRepositoryData(repoName); diff --git a/server/src/main/java/org/elasticsearch/action/support/PlainActionFuture.java b/server/src/main/java/org/elasticsearch/action/support/PlainActionFuture.java index 86ce24fc7a8bd..ee4433369f689 100644 --- a/server/src/main/java/org/elasticsearch/action/support/PlainActionFuture.java +++ b/server/src/main/java/org/elasticsearch/action/support/PlainActionFuture.java @@ -17,7 +17,6 @@ import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.FutureUtils; import org.elasticsearch.common.util.concurrent.UncategorizedExecutionException; -import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.threadpool.ThreadPool; @@ -369,12 +368,6 @@ private static RuntimeException unwrapEsException(ElasticsearchException esEx) { return new UncategorizedExecutionException("Failed execution", root); } - public static T get(CheckedConsumer, E> e) throws E { - PlainActionFuture fut = new PlainActionFuture<>(); - e.accept(fut); - return fut.actionGet(); - } - private boolean assertCompleteAllowed() { Thread waiter = sync.getFirstQueuedThread(); assert waiter == null || allowedExecutors(waiter, Thread.currentThread()) diff --git a/server/src/main/java/org/elasticsearch/action/support/UnsafePlainActionFuture.java b/server/src/main/java/org/elasticsearch/action/support/UnsafePlainActionFuture.java index 2d9585bd26b5f..8aa6bc4de109a 100644 --- a/server/src/main/java/org/elasticsearch/action/support/UnsafePlainActionFuture.java +++ b/server/src/main/java/org/elasticsearch/action/support/UnsafePlainActionFuture.java @@ -9,7 +9,6 @@ package org.elasticsearch.action.support; import org.elasticsearch.common.util.concurrent.EsExecutors; -import org.elasticsearch.core.CheckedConsumer; import java.util.Objects; @@ -43,10 +42,4 @@ boolean allowedExecutors(Thread thread1, Thread thread2) { || unsafeExecutor2 == null || unsafeExecutor2.equals(EsExecutors.executorName(thread1)); } - - public static T get(CheckedConsumer, E> e, String allowedExecutor) throws E { - PlainActionFuture fut = new UnsafePlainActionFuture<>(allowedExecutor); - e.accept(fut); - return fut.actionGet(); - } } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 5b7a11969973d..a3b36f67b316b 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -501,7 +501,9 @@ protected void doClose() { @Override public void awaitIdle() { assert lifecycle.closed(); - PlainActionFuture.get(closedAndIdleListeners::addListener); + final var future = new PlainActionFuture(); + closedAndIdleListeners.addListener(future); + future.actionGet(); // wait for as long as it takes } @SuppressForbidden(reason = "legacy usage of unbatched task") // TODO add support for batching here diff --git a/server/src/test/java/org/elasticsearch/action/support/ThreadedActionListenerTests.java b/server/src/test/java/org/elasticsearch/action/support/ThreadedActionListenerTests.java index b5f07b7d8d087..36113d9aa931d 100644 --- a/server/src/test/java/org/elasticsearch/action/support/ThreadedActionListenerTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/ThreadedActionListenerTests.java @@ -131,16 +131,16 @@ public void testToString() { assertEquals( "ThreadedActionListener[DeterministicTaskQueue/forkingExecutor/NoopActionListener]/onResponse", - PlainActionFuture.get(future -> new ThreadedActionListener(deterministicTaskQueue.getThreadPool(s -> { - future.onResponse(s.toString()); + safeAwait(listener -> new ThreadedActionListener(deterministicTaskQueue.getThreadPool(s -> { + listener.onResponse(s.toString()); return s; }).generic(), randomBoolean(), ActionListener.noop()).onResponse(null)) ); assertEquals( "ThreadedActionListener[DeterministicTaskQueue/forkingExecutor/NoopActionListener]/onFailure", - PlainActionFuture.get(future -> new ThreadedActionListener(deterministicTaskQueue.getThreadPool(s -> { - future.onResponse(s.toString()); + safeAwait(listener -> new ThreadedActionListener(deterministicTaskQueue.getThreadPool(s -> { + listener.onResponse(s.toString()); return s; }).generic(), randomBoolean(), ActionListener.noop()).onFailure(new ElasticsearchException("test"))) ); diff --git a/server/src/test/java/org/elasticsearch/client/internal/ParentTaskAssigningClientTests.java b/server/src/test/java/org/elasticsearch/client/internal/ParentTaskAssigningClientTests.java index f0f44407642d8..600c09be2c12f 100644 --- a/server/src/test/java/org/elasticsearch/client/internal/ParentTaskAssigningClientTests.java +++ b/server/src/test/java/org/elasticsearch/client/internal/ParentTaskAssigningClientTests.java @@ -19,7 +19,6 @@ import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.search.ClearScrollRequest; import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.tasks.TaskId; import org.elasticsearch.test.ESTestCase; @@ -95,10 +94,11 @@ public void ); assertEquals( "fake remote-cluster client", - expectThrows( + asInstanceOf( UnsupportedOperationException.class, - () -> PlainActionFuture.get( - fut -> remoteClusterClient.execute(ClusterStateAction.REMOTE_TYPE, new ClusterStateRequest(), fut) + safeAwaitFailure( + ClusterStateResponse.class, + listener -> remoteClusterClient.execute(ClusterStateAction.REMOTE_TYPE, new ClusterStateRequest(), listener) ) ).getMessage() ); diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/NodeLeftExecutorTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/NodeLeftExecutorTests.java index 41ce520dc9bb6..0292dc42c3a4b 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/NodeLeftExecutorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/NodeLeftExecutorTests.java @@ -9,7 +9,6 @@ package org.elasticsearch.cluster.coordination; import org.apache.logging.log4j.Level; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.node.DiscoveryNode; @@ -26,6 +25,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; @@ -128,13 +128,11 @@ public void testPerNodeLogging() { "node-left: [" + nodeToRemove.descriptionWithoutAttributes() + "] with reason [test reason]" ) ); - assertNull( - PlainActionFuture.get( - future -> clusterService.getMasterService() - .createTaskQueue("test", Priority.NORMAL, executor) - .submitTask("test", new NodeLeftExecutor.Task(nodeToRemove, "test reason", () -> future.onResponse(null)), null) - ) - ); + final var latch = new CountDownLatch(1); + clusterService.getMasterService() + .createTaskQueue("test", Priority.NORMAL, executor) + .submitTask("test", new NodeLeftExecutor.Task(nodeToRemove, "test reason", latch::countDown), null); + safeAwait(latch); mockLog.assertAllExpectationsMatched(); } finally { TestThreadPool.terminate(threadPool, 10, TimeUnit.SECONDS); diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/stateless/AtomicRegisterPreVoteCollectorTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/stateless/AtomicRegisterPreVoteCollectorTests.java index 0659b65be5844..036959068d76e 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/stateless/AtomicRegisterPreVoteCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/stateless/AtomicRegisterPreVoteCollectorTests.java @@ -10,7 +10,6 @@ import org.apache.logging.log4j.Level; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.node.DiscoveryNodeUtils; import org.elasticsearch.core.TimeValue; @@ -66,7 +65,7 @@ protected long absoluteTimeInMillis() { // Either there's no heartbeat or is stale if (randomBoolean()) { - PlainActionFuture.get(f -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), f)); + safeAwait((ActionListener l) -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), l)); fakeClock.set(maxTimeSinceLastHeartbeat.millis() + randomLongBetween(0, 1000)); } @@ -107,7 +106,7 @@ protected long absoluteTimeInMillis() { } }; - PlainActionFuture.get(f -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), f)); + safeAwait((ActionListener l) -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), l)); fakeClock.addAndGet(randomLongBetween(0L, maxTimeSinceLastHeartbeat.millis() - 1)); var startElection = new AtomicBoolean(); @@ -141,7 +140,7 @@ protected long absoluteTimeInMillis() { } }; - PlainActionFuture.get(f -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), f)); + safeAwait((ActionListener l) -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), l)); var startElection = new AtomicBoolean(); var preVoteCollector = new AtomicRegisterPreVoteCollector(heartbeatService, () -> startElection.set(true)); diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/stateless/StoreHeartbeatServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/stateless/StoreHeartbeatServiceTests.java index 9a783d802a68c..ac985e50ca520 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/stateless/StoreHeartbeatServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/stateless/StoreHeartbeatServiceTests.java @@ -66,7 +66,7 @@ public void testHeartBeatStoreScheduling() { final var currentLeader = DiscoveryNodeUtils.create("master"); heartbeatService.start(currentLeader, currentTermProvider.get(), completionListener); - Heartbeat firstHeartbeat = PlainActionFuture.get(heartbeatStore::readLatestHeartbeat); + Heartbeat firstHeartbeat = safeAwait(heartbeatStore::readLatestHeartbeat); assertThat(firstHeartbeat, is(notNullValue())); assertThat(firstHeartbeat.term(), is(equalTo(1L))); assertThat(firstHeartbeat.absoluteTimeInMillis(), is(lessThanOrEqualTo(threadPool.absoluteTimeInMillis()))); @@ -79,7 +79,7 @@ public void testHeartBeatStoreScheduling() { assertThat(completionListener.isDone(), is(false)); - Heartbeat secondHeartbeat = PlainActionFuture.get(heartbeatStore::readLatestHeartbeat); + Heartbeat secondHeartbeat = safeAwait(heartbeatStore::readLatestHeartbeat); assertThat(secondHeartbeat, is(notNullValue())); assertThat(secondHeartbeat.term(), is(equalTo(1L))); assertThat(secondHeartbeat.absoluteTimeInMillis(), is(greaterThanOrEqualTo(firstHeartbeat.absoluteTimeInMillis()))); @@ -95,7 +95,7 @@ public void testHeartBeatStoreScheduling() { // No new tasks are scheduled after stopping the heart beat service assertThat(threadPool.scheduledTasks.poll(), is(nullValue())); - Heartbeat heartbeatAfterStoppingTheService = PlainActionFuture.get(heartbeatStore::readLatestHeartbeat); + Heartbeat heartbeatAfterStoppingTheService = safeAwait(heartbeatStore::readLatestHeartbeat); assertThat(heartbeatAfterStoppingTheService, is(equalTo(secondHeartbeat))); assertThat(completionListener.isDone(), is(false)); @@ -134,7 +134,7 @@ public void writeHeartbeat(Heartbeat newHeartbeat, ActionListener listener heartbeatService.start(currentLeader, currentTermProvider.get(), completionListener); if (failFirstHeartBeat == false) { - Heartbeat firstHeartbeat = PlainActionFuture.get(heartbeatStore::readLatestHeartbeat); + Heartbeat firstHeartbeat = safeAwait(heartbeatStore::readLatestHeartbeat); assertThat(firstHeartbeat, is(notNullValue())); var scheduledTask = threadPool.scheduledTasks.poll(); @@ -179,7 +179,7 @@ public void testServiceStopsAfterTermBump() throws Exception { heartbeatService.start(currentLeader, currentTerm, completionListener); if (termBumpBeforeStart == false) { - Heartbeat firstHeartbeat = PlainActionFuture.get(heartbeatStore::readLatestHeartbeat); + Heartbeat firstHeartbeat = safeAwait(heartbeatStore::readLatestHeartbeat); assertThat(firstHeartbeat, is(notNullValue())); var scheduledTask = threadPool.scheduledTasks.poll(); @@ -229,7 +229,7 @@ protected long absoluteTimeInMillis() { // Empty store { - Heartbeat heartbeat = PlainActionFuture.get(heartbeatStore::readLatestHeartbeat); + Heartbeat heartbeat = safeAwait(heartbeatStore::readLatestHeartbeat); assertThat(heartbeat, is(nullValue())); AtomicBoolean noRecentLeaderFound = new AtomicBoolean(); @@ -239,7 +239,7 @@ protected long absoluteTimeInMillis() { // Recent heartbeat { - PlainActionFuture.get(f -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), f)); + safeAwait((ActionListener l) -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), l)); AtomicBoolean noRecentLeaderFound = new AtomicBoolean(); heartbeatService.checkLeaderHeartbeatAndRun(() -> noRecentLeaderFound.set(true), hb -> {}); @@ -248,7 +248,7 @@ protected long absoluteTimeInMillis() { // Stale heartbeat { - PlainActionFuture.get(f -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), f)); + safeAwait((ActionListener l) -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), l)); fakeClock.set(maxTimeSinceLastHeartbeat.millis() + 1); AtomicBoolean noRecentLeaderFound = new AtomicBoolean(); @@ -258,7 +258,7 @@ protected long absoluteTimeInMillis() { // Failing store { - PlainActionFuture.get(f -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), f)); + safeAwait((ActionListener l) -> heartbeatStore.writeHeartbeat(new Heartbeat(1, fakeClock.get()), l)); fakeClock.set(maxTimeSinceLastHeartbeat.millis() + 1); failReadingHeartbeat.set(true); @@ -309,7 +309,7 @@ protected long absoluteTimeInMillis() { retryTask.v2().run(); - Heartbeat firstHeartbeat = PlainActionFuture.get(heartbeatStore::readLatestHeartbeat); + Heartbeat firstHeartbeat = safeAwait(heartbeatStore::readLatestHeartbeat); assertThat(firstHeartbeat, is(notNullValue())); assertThat(firstHeartbeat.term(), is(equalTo(1L))); diff --git a/server/src/test/java/org/elasticsearch/common/blobstore/fs/FsBlobContainerTests.java b/server/src/test/java/org/elasticsearch/common/blobstore/fs/FsBlobContainerTests.java index 8e96c020ac33b..e849f82e169cc 100644 --- a/server/src/test/java/org/elasticsearch/common/blobstore/fs/FsBlobContainerTests.java +++ b/server/src/test/java/org/elasticsearch/common/blobstore/fs/FsBlobContainerTests.java @@ -11,7 +11,6 @@ import org.apache.lucene.tests.mockfile.FilterSeekableByteChannel; import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.common.blobstore.BlobPath; import org.elasticsearch.common.blobstore.OptionalBytesReference; @@ -262,8 +261,8 @@ public void testRegisterContention() throws Exception { final var finalValue = randomValueOtherThan(startValue, () -> new BytesArray(randomByteArrayOfLength(8))); final var p = randomPurpose(); - assertTrue(PlainActionFuture.get(l -> container.compareAndSetRegister(p, contendedKey, BytesArray.EMPTY, startValue, l))); - assertTrue(PlainActionFuture.get(l -> container.compareAndSetRegister(p, uncontendedKey, BytesArray.EMPTY, startValue, l))); + assertTrue(safeAwait(l -> container.compareAndSetRegister(p, contendedKey, BytesArray.EMPTY, startValue, l))); + assertTrue(safeAwait(l -> container.compareAndSetRegister(p, uncontendedKey, BytesArray.EMPTY, startValue, l))); final var threads = new Thread[between(2, 5)]; final var startBarrier = new CyclicBarrier(threads.length + 1); @@ -274,7 +273,7 @@ public void testRegisterContention() throws Exception { // first thread does an uncontended write, which must succeed ? () -> { safeAwait(startBarrier); - final OptionalBytesReference result = PlainActionFuture.get( + final OptionalBytesReference result = safeAwait( l -> container.compareAndExchangeRegister(p, uncontendedKey, startValue, finalValue, l) ); // NB calling .bytesReference() asserts that the result is present, there was no contention @@ -284,7 +283,7 @@ public void testRegisterContention() throws Exception { : () -> { safeAwait(startBarrier); while (casSucceeded.get() == false) { - final OptionalBytesReference result = PlainActionFuture.get( + final OptionalBytesReference result = safeAwait( l -> container.compareAndExchangeRegister(p, contendedKey, startValue, finalValue, l) ); if (result.isPresent() && result.bytesReference().equals(startValue)) { @@ -302,7 +301,7 @@ public void testRegisterContention() throws Exception { for (var key : new String[] { contendedKey, uncontendedKey }) { // NB calling .bytesReference() asserts that the read did not experience contention assertThat( - PlainActionFuture.get(l -> container.getRegister(p, key, l)).bytesReference(), + safeAwait((ActionListener l) -> container.getRegister(p, key, l)).bytesReference(), oneOf(startValue, finalValue) ); } @@ -315,7 +314,7 @@ public void testRegisterContention() throws Exception { for (var key : new String[] { contendedKey, uncontendedKey }) { assertEquals( finalValue, - PlainActionFuture.get(l -> container.getRegister(p, key, l)).bytesReference() + safeAwait((ActionListener l) -> container.getRegister(p, key, l)).bytesReference() ); } } diff --git a/server/src/test/java/org/elasticsearch/common/component/LifecycleTests.java b/server/src/test/java/org/elasticsearch/common/component/LifecycleTests.java index bea074b100609..c6e2b72e79e6b 100644 --- a/server/src/test/java/org/elasticsearch/common/component/LifecycleTests.java +++ b/server/src/test/java/org/elasticsearch/common/component/LifecycleTests.java @@ -8,8 +8,8 @@ package org.elasticsearch.common.component; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRunnable; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.RefCountingListener; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.EsExecutors; @@ -99,8 +99,8 @@ private static class ThreadSafetyTestHarness implements Releasable { void testTransition(BooleanSupplier doTransition) { final var transitioned = new AtomicBoolean(); - PlainActionFuture.get(fut -> { - try (var listeners = new RefCountingListener(fut)) { + safeAwait((ActionListener listener) -> { + try (var listeners = new RefCountingListener(listener)) { for (int i = 0; i < threads; i++) { executor.execute(ActionRunnable.run(listeners.acquire(), () -> { safeAwait(barrier); diff --git a/server/src/test/java/org/elasticsearch/http/DefaultRestChannelTests.java b/server/src/test/java/org/elasticsearch/http/DefaultRestChannelTests.java index d49347a0dd3fc..5c871c8b912a0 100644 --- a/server/src/test/java/org/elasticsearch/http/DefaultRestChannelTests.java +++ b/server/src/test/java/org/elasticsearch/http/DefaultRestChannelTests.java @@ -12,7 +12,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.lucene.util.BytesRef; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.bytes.BytesArray; @@ -706,7 +705,7 @@ private static void writeContent(OutputStream bso, ChunkedRestResponseBodyPart c if (content.isLastPart()) { return; } - writeContent(bso, PlainActionFuture.get(content::getNextPart)); + writeContent(bso, safeAwait(content::getNextPart)); } }; diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardOperationPermitsTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardOperationPermitsTests.java index f900b05872a17..352013b6b6890 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardOperationPermitsTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardOperationPermitsTests.java @@ -538,11 +538,9 @@ public void testAsyncBlockOperationsOnRejection() { final var rejectingExecutor = threadPool.executor(REJECTING_EXECUTOR); rejectingExecutor.execute(threadBlock::actionGet); - expectThrows( - EsRejectedExecutionException.class, - () -> PlainActionFuture.get( - f -> permits.blockOperations(f, 1, TimeUnit.HOURS, rejectingExecutor) - ) + assertThat( + safeAwaitFailure(Releasable.class, l -> permits.blockOperations(l, 1, TimeUnit.HOURS, rejectingExecutor)), + instanceOf(EsRejectedExecutionException.class) ); // ensure that the exception means no block was put in place @@ -554,7 +552,7 @@ public void testAsyncBlockOperationsOnRejection() { } // ensure that another block can still be acquired - try (Releasable block = PlainActionFuture.get(f -> permits.blockOperations(f, 1, TimeUnit.HOURS, threadPool.generic()))) { + try (Releasable block = safeAwait(l -> permits.blockOperations(l, 1, TimeUnit.HOURS, threadPool.generic()))) { assertNotNull(block); } } @@ -566,11 +564,9 @@ public void testAsyncBlockOperationsOnTimeout() { assertEquals( "timeout while blocking operations after [0s]", - expectThrows( + asInstanceOf( ElasticsearchTimeoutException.class, - () -> PlainActionFuture.get( - f -> permits.blockOperations(f, 0, TimeUnit.SECONDS, threadPool.generic()) - ) + safeAwaitFailure(Releasable.class, f -> permits.blockOperations(f, 0, TimeUnit.SECONDS, threadPool.generic())) ).getMessage() ); @@ -584,7 +580,7 @@ public void testAsyncBlockOperationsOnTimeout() { } // ensure that another block can still be acquired - try (Releasable block = PlainActionFuture.get(f -> permits.blockOperations(f, 1, TimeUnit.HOURS, threadPool.generic()))) { + try (Releasable block = safeAwait(l -> permits.blockOperations(l, 1, TimeUnit.HOURS, threadPool.generic()))) { assertNotNull(block); } } diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index 5aa47adfae4f8..f1b4b10405f3c 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -2098,9 +2098,17 @@ public void testShardCanNotBeMarkedAsRelocatedIfRelocationCancelled() throws IOE final ShardRouting relocationRouting = ShardRoutingHelper.relocate(originalRouting, "other_node"); IndexShardTestCase.updateRoutingEntry(shard, relocationRouting); IndexShardTestCase.updateRoutingEntry(shard, originalRouting); - expectThrows( + asInstanceOf( IllegalIndexShardStateException.class, - () -> blockingCallRelocated(shard, relocationRouting, (primaryContext, listener) -> fail("should not be called")) + safeAwaitFailure( + Void.class, + listener -> shard.relocated( + relocationRouting.relocatingNodeId(), + relocationRouting.getTargetRelocatingShard().allocationId().getId(), + (primaryContext, l) -> fail("should not be called"), + listener + ) + ) ); closeShards(shard); } @@ -2121,7 +2129,14 @@ public void onFailure(Exception e) { @Override protected void doRun() throws Exception { cyclicBarrier.await(); - blockingCallRelocated(shard, relocationRouting, (primaryContext, listener) -> listener.onResponse(null)); + final var relocatedCompleteLatch = new CountDownLatch(1); + shard.relocated( + relocationRouting.relocatingNodeId(), + relocationRouting.getTargetRelocatingShard().allocationId().getId(), + (primaryContext, listener) -> listener.onResponse(null), + ActionListener.releaseAfter(ActionListener.wrap(r -> {}, relocationException::set), relocatedCompleteLatch::countDown) + ); + safeAwait(relocatedCompleteLatch); } }); relocationThread.start(); @@ -2177,9 +2192,17 @@ public void testRelocateMismatchedTarget() throws Exception { final AtomicBoolean relocated = new AtomicBoolean(); - final IllegalIndexShardStateException wrongNodeException = expectThrows( + final IllegalIndexShardStateException wrongNodeException = asInstanceOf( IllegalIndexShardStateException.class, - () -> blockingCallRelocated(shard, wrongTargetNodeShardRouting, (ctx, listener) -> relocated.set(true)) + safeAwaitFailure( + Void.class, + listener -> shard.relocated( + wrongTargetNodeShardRouting.relocatingNodeId(), + wrongTargetNodeShardRouting.getTargetRelocatingShard().allocationId().getId(), + (ctx, l) -> relocated.set(true), + listener + ) + ) ); assertThat( wrongNodeException.getMessage(), @@ -2187,9 +2210,17 @@ public void testRelocateMismatchedTarget() throws Exception { ); assertFalse(relocated.get()); - final IllegalStateException wrongTargetIdException = expectThrows( + final IllegalStateException wrongTargetIdException = asInstanceOf( IllegalStateException.class, - () -> blockingCallRelocated(shard, wrongTargetAllocationIdShardRouting, (ctx, listener) -> relocated.set(true)) + safeAwaitFailure( + Void.class, + listener -> shard.relocated( + wrongTargetAllocationIdShardRouting.relocatingNodeId(), + wrongTargetAllocationIdShardRouting.getTargetRelocatingShard().allocationId().getId(), + (ctx, l) -> relocated.set(true), + listener + ) + ) ); assertThat( wrongTargetIdException.getMessage(), @@ -5023,8 +5054,13 @@ private static void blockingCallRelocated( ShardRouting routing, BiConsumer> consumer ) { - PlainActionFuture.get( - f -> indexShard.relocated(routing.relocatingNodeId(), routing.getTargetRelocatingShard().allocationId().getId(), consumer, f) + safeAwait( + (ActionListener listener) -> indexShard.relocated( + routing.relocatingNodeId(), + routing.getTargetRelocatingShard().allocationId().getId(), + consumer, + listener + ) ); } } diff --git a/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryRestoreTests.java b/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryRestoreTests.java index ce732a3b95a34..45cd944a4a926 100644 --- a/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryRestoreTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryRestoreTests.java @@ -10,7 +10,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.util.TestUtil; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.routing.RecoverySource; @@ -171,8 +170,8 @@ public void testSnapshotWithConflictingName() throws Exception { new SnapshotId(snapshot.getSnapshotId().getName(), "_uuid2") ); final ShardGenerations shardGenerations = ShardGenerations.builder().put(indexId, 0, shardGen).build(); - PlainActionFuture.get( - f -> repository.finalizeSnapshot( + final RepositoryData ignoredRepositoryData = safeAwait( + listener -> repository.finalizeSnapshot( new FinalizeSnapshotContext( shardGenerations, RepositoryData.EMPTY_REPO_GEN, @@ -192,7 +191,7 @@ public void testSnapshotWithConflictingName() throws Exception { Collections.emptyMap() ), IndexVersion.current(), - f, + listener, info -> {} ) ) diff --git a/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryTests.java b/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryTests.java index ac23f646e5c52..29c858d49a0b6 100644 --- a/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryTests.java @@ -87,6 +87,7 @@ import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.nullValue; @@ -233,7 +234,7 @@ public void testCorruptIndexLatestFile() throws Exception { } } - public void testRepositoryDataConcurrentModificationNotAllowed() throws Exception { + public void testRepositoryDataConcurrentModificationNotAllowed() { final BlobStoreRepository repository = setupRepo(); // write to index generational file @@ -244,7 +245,20 @@ public void testRepositoryDataConcurrentModificationNotAllowed() throws Exceptio // write repo data again to index generational file, errors because we already wrote to the // N+1 generation from which this repository data instance was created final RepositoryData fresherRepositoryData = repositoryData.withGenId(startingGeneration + 1); - expectThrows(RepositoryException.class, () -> writeIndexGen(repository, fresherRepositoryData, repositoryData.getGenId())); + + assertThat( + safeAwaitFailure( + RepositoryData.class, + listener -> repository.writeIndexGen( + fresherRepositoryData, + repositoryData.getGenId(), + IndexVersion.current(), + Function.identity(), + listener + ) + ), + instanceOf(RepositoryException.class) + ); } public void testBadChunksize() { @@ -330,9 +344,15 @@ public void testRepositoryDataDetails() throws Exception { snapshotDetailsAsserter.accept(AbstractSnapshotIntegTestCase.getRepositoryData(repository).getSnapshotDetails(snapshotId)); } - private static void writeIndexGen(BlobStoreRepository repository, RepositoryData repositoryData, long generation) throws Exception { - PlainActionFuture.get( - f -> repository.writeIndexGen(repositoryData, generation, IndexVersion.current(), Function.identity(), f) + private static void writeIndexGen(BlobStoreRepository repository, RepositoryData repositoryData, long generation) { + safeAwait( + (ActionListener listener) -> repository.writeIndexGen( + repositoryData, + generation, + IndexVersion.current(), + Function.identity(), + listener + ) ); } diff --git a/server/src/test/java/org/elasticsearch/transport/ClusterConnectionManagerTests.java b/server/src/test/java/org/elasticsearch/transport/ClusterConnectionManagerTests.java index 1a3e61d7eebfc..e97fb3220923d 100644 --- a/server/src/test/java/org/elasticsearch/transport/ClusterConnectionManagerTests.java +++ b/server/src/test/java/org/elasticsearch/transport/ClusterConnectionManagerTests.java @@ -119,7 +119,7 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti validatedConnectionRef.set(c); l.onResponse(null); }; - PlainActionFuture.get(fut -> connectionManager.connectToNode(node, connectionProfile, validator, fut.map(x -> null))); + safeAwait(listener -> connectionManager.connectToNode(node, connectionProfile, validator, listener.map(x -> null))); assertFalse(connection.isClosed()); assertTrue(connectionManager.nodeConnected(node)); @@ -166,9 +166,9 @@ public void testDisconnectLogging() { final ConnectionManager.ConnectionValidator validator = (c, p, l) -> l.onResponse(null); final AtomicReference toClose = new AtomicReference<>(); - PlainActionFuture.get(f -> connectionManager.connectToNode(remoteClose, connectionProfile, validator, f.map(x -> null))); - PlainActionFuture.get(f -> connectionManager.connectToNode(shutdownClose, connectionProfile, validator, f.map(x -> null))); - PlainActionFuture.get(f -> connectionManager.connectToNode(localClose, connectionProfile, validator, f.map(toClose::getAndSet))); + safeAwait(l -> connectionManager.connectToNode(remoteClose, connectionProfile, validator, l.map(x -> null))); + safeAwait(l -> connectionManager.connectToNode(shutdownClose, connectionProfile, validator, l.map(x -> null))); + safeAwait(l -> connectionManager.connectToNode(localClose, connectionProfile, validator, l.map(toClose::getAndSet))); final Releasable localConnectionRef = toClose.getAndSet(null); assertThat(localConnectionRef, notNullValue()); diff --git a/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java b/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java index 8a10af0843d44..d2e885f8da4be 100644 --- a/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java +++ b/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java @@ -7,6 +7,7 @@ */ package org.elasticsearch.transport; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.TransportVersion; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.cluster.state.ClusterStateAction; @@ -40,6 +41,7 @@ import static org.elasticsearch.transport.AbstractSimpleTransportTestCase.IGNORE_DESERIALIZATION_ERRORS_SETTING; import static org.elasticsearch.transport.RemoteClusterConnectionTests.startTransport; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; public class RemoteClusterClientTests extends ESTestCase { @@ -112,10 +114,13 @@ public void testConnectAndExecuteRequest() throws Exception { assertNotNull(clusterStateResponse); assertEquals("foo_bar_cluster", clusterStateResponse.getState().getClusterName().value()); // also test a failure, there is no handler for scroll registered - ActionNotFoundTransportException ex = expectThrows( + ActionNotFoundTransportException ex = asInstanceOf( ActionNotFoundTransportException.class, - () -> PlainActionFuture.get( - future -> client.execute(TransportSearchScrollAction.REMOTE_TYPE, new SearchScrollRequest(""), future) + ExceptionsHelper.unwrapCause( + safeAwaitFailure( + SearchResponse.class, + listener -> client.execute(TransportSearchScrollAction.REMOTE_TYPE, new SearchScrollRequest(""), listener) + ) ) ); assertEquals("No handler for action [indices:data/read/scroll]", ex.getMessage()); @@ -178,8 +183,8 @@ public void testEnsureWeReconnect() throws Exception { RemoteClusterService.DisconnectedStrategy.RECONNECT_UNLESS_SKIP_UNAVAILABLE ) ); - ClusterStateResponse clusterStateResponse = PlainActionFuture.get( - f -> client.execute(ClusterStateAction.REMOTE_TYPE, new ClusterStateRequest(), f) + ClusterStateResponse clusterStateResponse = safeAwait( + listener -> client.execute(ClusterStateAction.REMOTE_TYPE, new ClusterStateRequest(), listener) ); assertNotNull(clusterStateResponse); assertEquals("foo_bar_cluster", clusterStateResponse.getState().getClusterName().value()); @@ -265,11 +270,12 @@ public void testQuicklySkipUnavailableClusters() throws Exception { assertFalse(remoteClusterService.isRemoteNodeConnected("test", remoteNode)); // check that we quickly fail - expectThrows( - ConnectTransportException.class, - () -> PlainActionFuture.get( - f -> client.execute(ClusterStateAction.REMOTE_TYPE, new ClusterStateRequest(), f) - ) + ESTestCase.assertThat( + safeAwaitFailure( + ClusterStateResponse.class, + listener -> client.execute(ClusterStateAction.REMOTE_TYPE, new ClusterStateRequest(), listener) + ), + instanceOf(ConnectTransportException.class) ); } finally { service.clearAllRules(); @@ -277,14 +283,10 @@ public void testQuicklySkipUnavailableClusters() throws Exception { } assertBusy(() -> { - try { - PlainActionFuture.get( - f -> client.execute(ClusterStateAction.REMOTE_TYPE, new ClusterStateRequest(), f) - ); - } catch (ConnectTransportException e) { - // keep retrying on this exception, the goal is to check that we eventually reconnect - throw new AssertionError(e); - } + ClusterStateResponse ignored = safeAwait( + listener -> client.execute(ClusterStateAction.REMOTE_TYPE, new ClusterStateRequest(), listener) + ); + // keep retrying on an exception, the goal is to check that we eventually reconnect }); assertTrue(remoteClusterService.isRemoteNodeConnected("test", remoteNode)); } diff --git a/server/src/test/java/org/elasticsearch/transport/RemoteClusterConnectionTests.java b/server/src/test/java/org/elasticsearch/transport/RemoteClusterConnectionTests.java index 77a57bf1110fb..23f6246e9191d 100644 --- a/server/src/test/java/org/elasticsearch/transport/RemoteClusterConnectionTests.java +++ b/server/src/test/java/org/elasticsearch/transport/RemoteClusterConnectionTests.java @@ -922,7 +922,7 @@ public void testGetConnection() throws Exception { RemoteClusterCredentialsManager.EMPTY ) ) { - PlainActionFuture.get(fut -> connection.ensureConnected(fut.map(x -> null))); + safeAwait(listener -> connection.ensureConnected(listener.map(x -> null))); for (int i = 0; i < 10; i++) { // always a direct connection as the remote node is already connected Transport.Connection remoteConnection = connection.getConnection(seedNode); diff --git a/server/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java b/server/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java index c5034f51d1e26..6f02c354b7485 100644 --- a/server/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java +++ b/server/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java @@ -13,7 +13,6 @@ import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.Version; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeUtils; import org.elasticsearch.cluster.node.VersionInformation; @@ -21,6 +20,7 @@ import org.elasticsearch.common.network.NetworkService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.PageCacheRecycler; +import org.elasticsearch.core.Releasable; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexVersion; @@ -147,7 +147,7 @@ public void testConnectToNodeLight() { TestProfiles.LIGHT_PROFILE ) ) { - DiscoveryNode connectedNode = PlainActionFuture.get(fut -> transportServiceA.handshake(connection, timeout, fut)); + DiscoveryNode connectedNode = safeAwait(listener -> transportServiceA.handshake(connection, timeout, listener)); assertNotNull(connectedNode); // the name and version should be updated assertEquals(connectedNode.getName(), "TS_B"); @@ -177,21 +177,23 @@ public void testMismatchedClusterName() { .roles(emptySet()) .version(Version.CURRENT.minimumCompatibilityVersion(), IndexVersions.MINIMUM_COMPATIBLE, IndexVersion.current()) .build(); - IllegalStateException ex = expectThrows(IllegalStateException.class, () -> { - try ( - Transport.Connection connection = AbstractSimpleTransportTestCase.openConnection( - transportServiceA, - discoveryNode, - TestProfiles.LIGHT_PROFILE + try ( + Transport.Connection connection = AbstractSimpleTransportTestCase.openConnection( + transportServiceA, + discoveryNode, + TestProfiles.LIGHT_PROFILE + ) + ) { + assertThat( + asInstanceOf( + IllegalStateException.class, + safeAwaitFailure(DiscoveryNode.class, listener -> transportServiceA.handshake(connection, timeout, listener)) + ).getMessage(), + containsString( + "handshake with [" + discoveryNode + "] failed: remote cluster name [b] does not match local cluster name [a]" ) - ) { - PlainActionFuture.get(fut -> transportServiceA.handshake(connection, timeout, fut.map(x -> null))); - } - }); - assertThat( - ex.getMessage(), - containsString("handshake with [" + discoveryNode + "] failed: remote cluster name [b] does not match local cluster name [a]") - ); + ); + } assertFalse(transportServiceA.nodeConnected(discoveryNode)); } @@ -220,29 +222,29 @@ public void testIncompatibleNodeVersions() { .roles(emptySet()) .version(Version.CURRENT.minimumCompatibilityVersion(), IndexVersions.MINIMUM_COMPATIBLE, IndexVersion.current()) .build(); - IllegalStateException ex = expectThrows(IllegalStateException.class, () -> { - try ( - Transport.Connection connection = AbstractSimpleTransportTestCase.openConnection( - transportServiceA, - discoveryNode, - TestProfiles.LIGHT_PROFILE - ) - ) { - PlainActionFuture.get(fut -> transportServiceA.handshake(connection, timeout, fut.map(x -> null))); - } - }); - assertThat( - ex.getMessage(), - containsString( - "handshake with [" - + discoveryNode - + "] failed: remote node version [" - + transportServiceB.getLocalNode().getVersion() - + "] is incompatible with local node version [" - + Version.CURRENT - + "]" + try ( + Transport.Connection connection = AbstractSimpleTransportTestCase.openConnection( + transportServiceA, + discoveryNode, + TestProfiles.LIGHT_PROFILE ) - ); + ) { + assertThat( + asInstanceOf( + IllegalStateException.class, + safeAwaitFailure(DiscoveryNode.class, listener -> transportServiceA.handshake(connection, timeout, listener)) + ).getMessage(), + containsString( + "handshake with [" + + discoveryNode + + "] failed: remote node version [" + + transportServiceB.getLocalNode().getVersion() + + "] is incompatible with local node version [" + + Version.CURRENT + + "]" + ) + ); + } assertFalse(transportServiceA.nodeConnected(discoveryNode)); } @@ -267,17 +269,13 @@ public void testIncompatibleTransportVersions() { .roles(emptySet()) .version(Version.CURRENT.minimumCompatibilityVersion(), IndexVersions.MINIMUM_COMPATIBLE, IndexVersion.current()) .build(); - expectThrows(ConnectTransportException.class, () -> { - try ( - Transport.Connection connection = AbstractSimpleTransportTestCase.openConnection( - transportServiceA, - discoveryNode, - TestProfiles.LIGHT_PROFILE - ) - ) { - PlainActionFuture.get(fut -> transportServiceA.handshake(connection, timeout, fut.map(x -> null))); - } - }); + assertThat( + safeAwaitFailure( + Transport.Connection.class, + listener -> transportServiceA.openConnection(discoveryNode, TestProfiles.LIGHT_PROFILE, listener) + ), + instanceOf(ConnectTransportException.class) + ); // the error is exposed as a general connection exception, the actual message is in the logs assertFalse(transportServiceA.nodeConnected(discoveryNode)); } @@ -303,12 +301,14 @@ public void testNodeConnectWithDifferentNodeId() { .roles(emptySet()) .version(transportServiceB.getLocalNode().getVersionInformation()) .build(); - ConnectTransportException ex = expectThrows( - ConnectTransportException.class, - () -> AbstractSimpleTransportTestCase.connectToNode(transportServiceA, discoveryNode, TestProfiles.LIGHT_PROFILE) - ); assertThat( - ex.getMessage(), + asInstanceOf( + ConnectTransportException.class, + safeAwaitFailure( + Releasable.class, + listener -> transportServiceA.connectToNode(discoveryNode, TestProfiles.LIGHT_PROFILE, listener) + ) + ).getMessage(), allOf( containsString("Connecting to [" + discoveryNode.getAddress() + "] failed"), containsString("expected to connect to [" + discoveryNode.descriptionWithoutAttributes() + "]"), @@ -350,21 +350,24 @@ public void testRejectsMismatchedBuildHash() { .roles(emptySet()) .version(Version.CURRENT.minimumCompatibilityVersion(), IndexVersions.MINIMUM_COMPATIBLE, IndexVersion.current()) .build(); - TransportSerializationException ex = expectThrows(TransportSerializationException.class, () -> { - try ( - Transport.Connection connection = AbstractSimpleTransportTestCase.openConnection( - transportServiceA, - discoveryNode, - TestProfiles.LIGHT_PROFILE - ) - ) { - PlainActionFuture.get(fut -> transportServiceA.handshake(connection, timeout, fut.map(x -> null))); - } - }); - assertThat( - ExceptionsHelper.unwrap(ex, IllegalArgumentException.class).getMessage(), - containsString("which has an incompatible wire format") - ); + try ( + Transport.Connection connection = AbstractSimpleTransportTestCase.openConnection( + transportServiceA, + discoveryNode, + TestProfiles.LIGHT_PROFILE + ) + ) { + assertThat( + ExceptionsHelper.unwrap( + asInstanceOf( + TransportSerializationException.class, + safeAwaitFailure(DiscoveryNode.class, listener -> transportServiceA.handshake(connection, timeout, listener)) + ), + IllegalArgumentException.class + ).getMessage(), + containsString("which has an incompatible wire format") + ); + } assertFalse(transportServiceA.nodeConnected(discoveryNode)); } diff --git a/test/framework/src/main/java/org/elasticsearch/action/support/TestPlainActionFuture.java b/test/framework/src/main/java/org/elasticsearch/action/support/TestPlainActionFuture.java new file mode 100644 index 0000000000000..0264920c9d017 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/action/support/TestPlainActionFuture.java @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.support; + +/** + * A {@link PlainActionFuture} which bypasses the deadlock-detection checks since we're only using this in tests. + */ +public class TestPlainActionFuture extends PlainActionFuture { + @Override + boolean allowedExecutors(Thread thread1, Thread thread2) { + return true; + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java index 6951e1941686d..ced1de370e0dd 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java @@ -11,7 +11,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.SetOnce; -import org.elasticsearch.action.ActionRunnable; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequestBuilder; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotRequestBuilder; @@ -292,12 +291,10 @@ protected BlobStore newBlobStore() { } protected BlobStore newBlobStore(String repository) { - final BlobStoreRepository blobStoreRepository = (BlobStoreRepository) internalCluster().getAnyMasterNodeInstance( - RepositoriesService.class - ).repository(repository); - return PlainActionFuture.get( - f -> blobStoreRepository.threadPool().generic().execute(ActionRunnable.supply(f, blobStoreRepository::blobStore)) - ); + return asInstanceOf( + BlobStoreRepository.class, + internalCluster().getAnyMasterNodeInstance(RepositoriesService.class).repository(repository) + ).blobStore(); } public void testSnapshotAndRestore() throws Exception { diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java index b4bc93cd82889..1b49209b49c7f 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -11,7 +11,6 @@ import org.elasticsearch.Version; import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.ActionRunnable; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.get.SnapshotSortKey; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -425,20 +424,11 @@ protected String initWithSnapshotVersion(String repoName, Path repoPath, IndexVe downgradedSnapshotInfo = SnapshotInfo.fromXContentInternal(repoName, parser); } final BlobStoreRepository blobStoreRepository = getRepositoryOnMaster(repoName); - PlainActionFuture.get( - f -> blobStoreRepository.threadPool() - .generic() - .execute( - ActionRunnable.run( - f, - () -> BlobStoreRepository.SNAPSHOT_FORMAT.write( - downgradedSnapshotInfo, - blobStoreRepository.blobStore().blobContainer(blobStoreRepository.basePath()), - snapshotInfo.snapshotId().getUUID(), - randomBoolean() - ) - ) - ) + BlobStoreRepository.SNAPSHOT_FORMAT.write( + downgradedSnapshotInfo, + blobStoreRepository.blobStore().blobContainer(blobStoreRepository.basePath()), + snapshotInfo.snapshotId().getUUID(), + randomBoolean() ); final RepositoryMetadata repoMetadata = blobStoreRepository.getMetadata(); @@ -550,15 +540,15 @@ protected void addBwCFailedSnapshot(String repoName, String snapshotName, Mapget( - f -> repo.finalizeSnapshot( + safeAwait( + (ActionListener listener) -> repo.finalizeSnapshot( new FinalizeSnapshotContext( ShardGenerations.EMPTY, getRepositoryData(repoName).getGenId(), state.metadata(), snapshotInfo, SnapshotsService.OLD_SNAPSHOT_FORMAT, - f, + listener, info -> {} ) ) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 95f77d5cfc00e..dc8a1dc29d233 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -44,8 +44,8 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.RequestBuilder; import org.elasticsearch.action.support.ActionTestUtils; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.SubscribableListener; +import org.elasticsearch.action.support.TestPlainActionFuture; import org.elasticsearch.bootstrap.BootstrapForTesting; import org.elasticsearch.client.internal.Requests; import org.elasticsearch.cluster.ClusterModule; @@ -2287,7 +2287,7 @@ public static void safeAcquire(int permits, Semaphore semaphore) { * @return The value with which the {@code listener} was completed. */ public static T safeAwait(SubscribableListener listener) { - final var future = new PlainActionFuture(); + final var future = new TestPlainActionFuture(); listener.addListener(future); return safeGet(future); } diff --git a/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java b/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java index 6e9672e924043..e264e25641795 100644 --- a/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java @@ -1868,8 +1868,8 @@ public void handleException(TransportException exp) { assertBusy(() -> assertFalse(serviceB.nodeConnected(nodeA))); // now try to connect again and see that it fails - expectThrows(ConnectTransportException.class, () -> connectToNode(serviceB, nodeA)); - expectThrows(ConnectTransportException.class, () -> openConnection(serviceB, nodeA, TestProfiles.LIGHT_PROFILE)); + assertNotNull(connectToNodeExpectFailure(serviceB, nodeA, null)); + assertNotNull(openConnectionExpectFailure(serviceB, nodeA, TestProfiles.LIGHT_PROFILE)); } public void testMockUnresponsiveRule() throws InterruptedException { @@ -1916,11 +1916,9 @@ public void handleException(TransportException exp) { ); assertThat(expectThrows(ExecutionException.class, res::get).getCause(), instanceOf(ReceiveTimeoutTransportException.class)); - expectThrows(ConnectTransportException.class, () -> { - serviceB.disconnectFromNode(nodeA); - connectToNode(serviceB, nodeA); - }); - expectThrows(ConnectTransportException.class, () -> openConnection(serviceB, nodeA, TestProfiles.LIGHT_PROFILE)); + serviceB.disconnectFromNode(nodeA); + assertNotNull(connectToNodeExpectFailure(serviceB, nodeA, null)); + assertNotNull(openConnectionExpectFailure(serviceB, nodeA, TestProfiles.LIGHT_PROFILE)); } public void testHostOnMessages() throws InterruptedException { @@ -2342,7 +2340,7 @@ public void testHandshakeWithIncompatVersion() { TransportRequestOptions.Type.REG, TransportRequestOptions.Type.STATE ); - expectThrows(ConnectTransportException.class, () -> openConnection(serviceA, node, builder.build())); + assertNotNull(openConnectionExpectFailure(serviceA, node, builder.build())); } } @@ -2448,10 +2446,7 @@ public void testTcpHandshakeTimeout() throws IOException { TransportRequestOptions.Type.STATE ); builder.setHandshakeTimeout(TimeValue.timeValueMillis(1)); - ConnectTransportException ex = expectThrows( - ConnectTransportException.class, - () -> connectToNode(serviceA, dummy, builder.build()) - ); + ConnectTransportException ex = connectToNodeExpectFailure(serviceA, dummy, builder.build()); assertEquals("[][" + dummy.getAddress() + "] handshake_timeout[1ms]", ex.getMessage()); } } @@ -2488,10 +2483,7 @@ public void run() { TransportRequestOptions.Type.STATE ); builder.setHandshakeTimeout(TimeValue.timeValueHours(1)); - ConnectTransportException ex = expectThrows( - ConnectTransportException.class, - () -> connectToNode(serviceA, dummy, builder.build()) - ); + ConnectTransportException ex = connectToNodeExpectFailure(serviceA, dummy, builder.build()); assertEquals("[][" + dummy.getAddress() + "] general node connection failure", ex.getMessage()); assertThat(ex.getCause().getMessage(), startsWith("handshake failed")); t.join(); @@ -3160,10 +3152,7 @@ public void onConnectionClosed(Transport.Connection connection) { TransportRequestOptions.Type.REG, TransportRequestOptions.Type.STATE ); - final ConnectTransportException e = expectThrows( - ConnectTransportException.class, - () -> openConnection(service, nodeA, builder.build()) - ); + final ConnectTransportException e = openConnectionExpectFailure(service, nodeA, builder.build()); assertThat(e, hasToString(containsString(("a channel closed while connecting")))); assertTrue(connectionClosedListenerCalled.get()); } @@ -3506,7 +3495,21 @@ public static void connectToNode(TransportService service, DiscoveryNode node) t * @param connectionProfile the connection profile to use when connecting to this node */ public static void connectToNode(TransportService service, DiscoveryNode node, ConnectionProfile connectionProfile) { - UnsafePlainActionFuture.get(fut -> service.connectToNode(node, connectionProfile, fut.map(x -> null)), ThreadPool.Names.GENERIC); + safeAwait(listener -> service.connectToNode(node, connectionProfile, listener.map(ignored -> null))); + } + + /** + * Attempt to connect to the specified node, but assert that this fails and return the resulting exception. + */ + public static ConnectTransportException connectToNodeExpectFailure( + TransportService service, + DiscoveryNode node, + ConnectionProfile connectionProfile + ) { + return asInstanceOf( + ConnectTransportException.class, + safeAwaitFailure(Releasable.class, listener -> service.connectToNode(node, connectionProfile, listener)) + ); } /** @@ -3517,7 +3520,21 @@ public static void connectToNode(TransportService service, DiscoveryNode node, C * @param connectionProfile the connection profile to use */ public static Transport.Connection openConnection(TransportService service, DiscoveryNode node, ConnectionProfile connectionProfile) { - return PlainActionFuture.get(fut -> service.openConnection(node, connectionProfile, fut)); + return safeAwait(listener -> service.openConnection(node, connectionProfile, listener)); + } + + /** + * Attempt to connect to the specified node, but assert that this fails and return the resulting exception. + */ + public static ConnectTransportException openConnectionExpectFailure( + TransportService service, + DiscoveryNode node, + ConnectionProfile connectionProfile + ) { + return asInstanceOf( + ConnectTransportException.class, + safeAwaitFailure(Transport.Connection.class, listener -> service.openConnection(node, connectionProfile, listener)) + ); } public static Future submitRequest( diff --git a/test/framework/src/test/java/org/elasticsearch/transport/DisruptableMockTransportTests.java b/test/framework/src/test/java/org/elasticsearch/transport/DisruptableMockTransportTests.java index 9582d28327122..f79dbbd1a9b35 100644 --- a/test/framework/src/test/java/org/elasticsearch/transport/DisruptableMockTransportTests.java +++ b/test/framework/src/test/java/org/elasticsearch/transport/DisruptableMockTransportTests.java @@ -593,32 +593,28 @@ public void testBrokenLinkFailsToConnect() { disconnectedLinks.add(Tuple.tuple(node1, node2)); assertThat( - expectThrows(ConnectTransportException.class, () -> AbstractSimpleTransportTestCase.connectToNode(service1, node2)) - .getMessage(), + AbstractSimpleTransportTestCase.connectToNodeExpectFailure(service1, node2, null).getMessage(), endsWith("is [DISCONNECTED] not [CONNECTED]") ); disconnectedLinks.clear(); blackholedLinks.add(Tuple.tuple(node1, node2)); assertThat( - expectThrows(ConnectTransportException.class, () -> AbstractSimpleTransportTestCase.connectToNode(service1, node2)) - .getMessage(), + AbstractSimpleTransportTestCase.connectToNodeExpectFailure(service1, node2, null).getMessage(), endsWith("is [BLACK_HOLE] not [CONNECTED]") ); blackholedLinks.clear(); blackholedRequestLinks.add(Tuple.tuple(node1, node2)); assertThat( - expectThrows(ConnectTransportException.class, () -> AbstractSimpleTransportTestCase.connectToNode(service1, node2)) - .getMessage(), + AbstractSimpleTransportTestCase.connectToNodeExpectFailure(service1, node2, null).getMessage(), endsWith("is [BLACK_HOLE_REQUESTS_ONLY] not [CONNECTED]") ); blackholedRequestLinks.clear(); final DiscoveryNode node3 = DiscoveryNodeUtils.create("node3"); assertThat( - expectThrows(ConnectTransportException.class, () -> AbstractSimpleTransportTestCase.connectToNode(service1, node3)) - .getMessage(), + AbstractSimpleTransportTestCase.connectToNodeExpectFailure(service1, node3, null).getMessage(), endsWith("does not exist") ); } diff --git a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java index edeed9a16034a..5d341897de57b 100644 --- a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java +++ b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java @@ -591,19 +591,15 @@ public void testFetchFullCacheEntryConcurrently() throws Exception { threads[i] = new Thread(() -> { for (int j = 0; j < 1000; j++) { final var cacheKey = generateCacheKey(); - try { - PlainActionFuture.get( - f -> cacheService.maybeFetchFullEntry( - cacheKey, - size, - (channel, channelPos, relativePos, length, progressUpdater) -> progressUpdater.accept(length), - bulkExecutor, - f - ) - ); - } catch (Exception e) { - throw new AssertionError(e); - } + safeAwait( + (ActionListener listener) -> cacheService.maybeFetchFullEntry( + cacheKey, + size, + (channel, channelPos, relativePos, length, progressUpdater) -> progressUpdater.accept(length), + bulkExecutor, + listener + ) + ); } }); } diff --git a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/ShardFollowTaskReplicationTests.java b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/ShardFollowTaskReplicationTests.java index 04a97ad9e7f95..5cd9f8bc5b78c 100644 --- a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/ShardFollowTaskReplicationTests.java +++ b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/ShardFollowTaskReplicationTests.java @@ -834,12 +834,12 @@ protected void adaptResponse(BulkShardOperationsResponse response, IndexShard in @Override protected void performOnReplica(BulkShardOperationsRequest request, IndexShard replica) throws Exception { try ( - Releasable ignored = PlainActionFuture.get( - f -> replica.acquireReplicaOperationPermit( + Releasable ignored = safeAwait( + listener -> replica.acquireReplicaOperationPermit( getPrimaryShard().getPendingPrimaryTerm(), getPrimaryShard().getLastKnownGlobalCheckpoint(), getPrimaryShard().getMaxSeqNoOfUpdatesOrDeletes(), - f, + ActionListener.assertOnce(listener), EsExecutors.DIRECT_EXECUTOR_SERVICE ) ) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotShardTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotShardTests.java index 34abaeb4cdf29..e39ddc170c0a9 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotShardTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotShardTests.java @@ -363,13 +363,11 @@ public void onFailure(Exception e) { indexId ) ).build(); - IndexMetadata metadata = runAsSnapshot( - threadPool, - () -> repository.getSnapshotIndexMetaData( - PlainActionFuture.get(listener -> repository.getRepositoryData(EsExecutors.DIRECT_EXECUTOR_SERVICE, listener)), - snapshotId, - indexId - ) + + IndexMetadata metadata = repository.getSnapshotIndexMetaData( + safeAwait(listener -> repository.getRepositoryData(EsExecutors.DIRECT_EXECUTOR_SERVICE, listener)), + snapshotId, + indexId ); IndexShard restoredShard = newShard( shardRouting, diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/netty4/SimpleSecurityNetty4ServerTransportTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/netty4/SimpleSecurityNetty4ServerTransportTests.java index 74b02c1d63bbf..888e858f2b039 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/netty4/SimpleSecurityNetty4ServerTransportTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/netty4/SimpleSecurityNetty4ServerTransportTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.action.support.TestPlainActionFuture; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodeUtils; @@ -34,6 +35,7 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Releasable; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.core.TimeValue; import org.elasticsearch.env.TestEnvironment; @@ -80,6 +82,7 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -167,19 +170,15 @@ protected Set> getSupportedSettings() { } public void testConnectException() throws UnknownHostException { - try { - connectToNode( - serviceA, - DiscoveryNodeUtils.create("C", new TransportAddress(InetAddress.getByName("localhost"), 9876), emptyMap(), emptySet()) - ); - fail("Expected ConnectTransportException"); - } catch (ConnectTransportException e) { - assertThat(e.getMessage(), containsString("connect_exception")); - assertThat(e.getMessage(), containsString("[127.0.0.1:9876]")); - Throwable cause = ExceptionsHelper.unwrap(e, IOException.class); - assertThat(cause, instanceOf(IOException.class)); - } - + final ConnectTransportException e = connectToNodeExpectFailure( + serviceA, + DiscoveryNodeUtils.create("C", new TransportAddress(InetAddress.getByName("localhost"), 9876), emptyMap(), emptySet()), + null + ); + assertThat(e.getMessage(), containsString("connect_exception")); + assertThat(e.getMessage(), containsString("[127.0.0.1:9876]")); + Throwable cause = ExceptionsHelper.unwrap(e, IOException.class); + assertThat(cause, instanceOf(IOException.class)); } @Override @@ -314,11 +313,8 @@ public boolean matches(SNIServerName sniServerName) { ); new Thread(() -> { - try { - connectToNode(serviceC, node, TestProfiles.LIGHT_PROFILE); - } catch (ConnectTransportException ex) { - // Ignore. The other side is not setup to do the ES handshake. So this will fail. - } + // noinspection ThrowableNotThrown + connectToNodeExpectFailure(serviceC, node, TestProfiles.LIGHT_PROFILE); }).start(); latch.await(); @@ -360,12 +356,10 @@ public void testInvalidSNIServerName() throws Exception { DiscoveryNodeRole.roles() ); - ConnectTransportException connectException = expectThrows( - ConnectTransportException.class, - () -> connectToNode(serviceC, node, TestProfiles.LIGHT_PROFILE) + assertThat( + connectToNodeExpectFailure(serviceC, node, TestProfiles.LIGHT_PROFILE).getMessage(), + containsString("invalid DiscoveryNode server_name [invalid_hostname]") ); - - assertThat(connectException.getMessage(), containsString("invalid DiscoveryNode server_name [invalid_hostname]")); } } } @@ -574,10 +568,7 @@ public void testClientChannelUsesSeparateSslConfigurationForRemoteCluster() thro // 1. Connection will fail because FC server certificate is not trusted by default final Settings qcSettings1 = Settings.builder().build(); try (MockTransportService qcService = buildService("QC", VersionInformation.CURRENT, TransportVersion.current(), qcSettings1)) { - final ConnectTransportException e = expectThrows( - ConnectTransportException.class, - () -> openConnection(qcService, node, connectionProfile) - ); + final ConnectTransportException e = openConnectionExpectFailure(qcService, node, connectionProfile); assertThat( e.getRootCause().getMessage(), anyOf(containsString("unable to find valid certification path"), containsString("Unable to find certificate chain")) @@ -897,11 +888,10 @@ public void testTcpHandshakeTimeout() throws IOException { builder.setHandshakeTimeout(TimeValue.timeValueMillis(1)); Settings settings = Settings.builder().put("xpack.security.transport.ssl.verification_mode", "none").build(); try (MockTransportService serviceC = buildService("TS_C", version0, transportVersion0, settings)) { - ConnectTransportException ex = expectThrows( - ConnectTransportException.class, - () -> connectToNode(serviceC, dummy, builder.build()) + assertEquals( + "[][" + dummy.getAddress() + "] handshake_timeout[1ms]", + connectToNodeExpectFailure(serviceC, dummy, builder.build()).getMessage() ); - assertEquals("[][" + dummy.getAddress() + "] handshake_timeout[1ms]", ex.getMessage()); } } finally { doneLatch.countDown(); @@ -934,10 +924,9 @@ public void testTlsHandshakeTimeout() throws IOException { TransportRequestOptions.Type.REG, TransportRequestOptions.Type.STATE ); - ConnectTransportException ex = expectThrows( - ConnectTransportException.class, - () -> connectToNode(serviceA, dummy, builder.build()) - ); + final var future = new TestPlainActionFuture(); + serviceA.connectToNode(dummy, builder.build(), future); + final var ex = expectThrows(ExecutionException.class, ConnectTransportException.class, future::get); // long wait assertEquals("[][" + dummy.getAddress() + "] connect_exception", ex.getMessage()); assertNotNull(ExceptionsHelper.unwrap(ex, SslHandshakeTimeoutException.class)); } finally { @@ -982,10 +971,7 @@ public void testTcpHandshakeConnectionReset() throws IOException, InterruptedExc builder.setHandshakeTimeout(TimeValue.timeValueHours(1)); Settings settings = Settings.builder().put("xpack.security.transport.ssl.verification_mode", "none").build(); try (MockTransportService serviceC = buildService("TS_C", version0, transportVersion0, settings)) { - ConnectTransportException ex = expectThrows( - ConnectTransportException.class, - () -> connectToNode(serviceC, dummy, builder.build()) - ); + ConnectTransportException ex = connectToNodeExpectFailure(serviceC, dummy, builder.build()); assertEquals("[][" + dummy.getAddress() + "] general node connection failure", ex.getMessage()); assertThat(ex.getCause().getMessage(), startsWith("handshake failed")); } From cadb3f9325900ced179d0965bac975a92f664088 Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Wed, 17 Jul 2024 08:12:52 +0100 Subject: [PATCH 08/65] Remove typo put-lifecycle.asciidoc (#110875) (#110918) --- .../data-streams/lifecycle/apis/put-lifecycle.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc b/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc index 6bd157071f54e..7d33a5b5f880c 100644 --- a/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc +++ b/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc @@ -54,7 +54,7 @@ duration the document could be deleted. When empty, every document in this data `enabled`:: (Optional, boolean) -If defined, it turns data streqm lifecycle on/off (`true`/`false`) for this data stream. +If defined, it turns data stream lifecycle on/off (`true`/`false`) for this data stream. A data stream lifecycle that's disabled (`enabled: false`) will have no effect on the data stream. Defaults to `true`. From 61bf77ef45ecf45fff148f67245fc49aa397d0f0 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 17 Jul 2024 17:18:17 +1000 Subject: [PATCH 09/65] [test] Improve failure mode of NodeConnectionsServiceTests (#110956) Currently, when NodeConnectionServiceTests #testOnlyBlocksOnConnectionsToNewNodes fails due to an exception, it resets a CyclicBarrier which causes an AssertionError to be thrown on a connector thread. The AssertionError ends up being uncaught, so when we try to stop the TransportService in the tear-down it blocks indefinitely waiting for that connector thread to complete. This causes the original exception to be obscured. This change makes the connector thread fail more gracefully so that the test can complete and the root cause will be revealed. --- .../cluster/NodeConnectionsServiceTests.java | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java index 4f7d8ddb2d9d0..4f1c5b7fa5dc5 100644 --- a/server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java @@ -596,29 +596,25 @@ public TransportAddress[] addressesFromString(String address) { return new TransportAddress[0]; } - private void runConnectionBlock(CheckedRunnable connectionBlock) { + private void runConnectionBlock(CheckedRunnable connectionBlock) throws Exception { if (connectionBlock == null) { return; } - try { - connectionBlock.run(); - } catch (Exception e) { - throw new AssertionError(e); - } + connectionBlock.run(); } @Override public void openConnection(DiscoveryNode node, ConnectionProfile profile, ActionListener listener) { final CheckedRunnable connectionBlock = nodeConnectionBlocks.get(node); if (profile == null && randomConnectionExceptions && randomBoolean()) { - threadPool.generic().execute(() -> { + threadPool.generic().execute(() -> ActionListener.completeWith(listener, () -> { runConnectionBlock(connectionBlock); - listener.onFailure(new ConnectTransportException(node, "simulated")); - }); + throw new ConnectTransportException(node, "simulated"); + })); } else { - threadPool.generic().execute(() -> { + threadPool.generic().execute(() -> ActionListener.completeWith(listener, () -> { runConnectionBlock(connectionBlock); - listener.onResponse(new Connection() { + return new Connection() { private final SubscribableListener closeListener = new SubscribableListener<>(); private final SubscribableListener removedListener = new SubscribableListener<>(); @@ -682,8 +678,8 @@ public boolean decRef() { public boolean hasReferences() { return refCounted.hasReferences(); } - }); - }); + }; + })); } } From 54ed2ec4ccdf3a66b7ea981e43631566da2b28b5 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 17 Jul 2024 09:05:40 +0100 Subject: [PATCH 10/65] Minor simplification to get-snapshots action (#110959) Inlines some methods that are now only used in one place. --- .../get/TransportGetSnapshotsAction.java | 101 ++++++++---------- 1 file changed, 46 insertions(+), 55 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java index 84f98e36f2a37..d36cf7bf08b1f 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java @@ -255,7 +255,15 @@ void getMultipleReposSnapshotInfo(ActionListener listener) } SubscribableListener.newForked(l -> maybeGetRepositoryData(repoName, l)) - .andThen((l, repositoryData) -> loadSnapshotInfos(repoName, repositoryData, l)) + .andThen((repositoryListener, repositoryData) -> { + assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); + cancellableTask.ensureNotCancelled(); + ensureRequiredNamesPresent(repoName, repositoryData); + loadSnapshotInfos( + getAsyncSnapshotInfoIterator(repositoriesService.repository(repoName), repositoryData), + repositoryListener + ); + }) .addListener(listeners.acquire()); } } @@ -281,14 +289,6 @@ private boolean skipRepository(String repositoryName) { } } - private void loadSnapshotInfos(String repositoryName, @Nullable RepositoryData repositoryData, ActionListener listener) { - assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); - cancellableTask.ensureNotCancelled(); - final var repository = repositoriesService.repository(repositoryName); - ensureRequiredNamesPresent(repositoryName, repositoryData); - loadSnapshotInfos(getAsyncSnapshotInfoIterator(repository, repositoryData), listener); - } - /** * Check that the repository contains every required name according to {@link #snapshotNamePredicate}. * @@ -459,54 +459,45 @@ private void loadSnapshotInfos(Iterator asyncSnapshotInfoIter final List snapshots = new ArrayList<>(); final List syncSnapshots = Collections.synchronizedList(snapshots); - SubscribableListener - - .newForked(l -> { - try (var listeners = new RefCountingListener(l)) { - ThrottledIterator.run( - Iterators.failFast(asyncSnapshotInfoIterator, () -> cancellableTask.isCancelled() || listeners.isFailing()), - (ref, asyncSnapshotInfo) -> { - final var refListener = ActionListener.runBefore(listeners.acquire(), ref::close); - asyncSnapshotInfo.getSnapshotInfo(new ActionListener<>() { - @Override - public void onResponse(SnapshotInfo snapshotInfo) { - if (matchesPredicates(snapshotInfo)) { - repositoryTotalCount.incrementAndGet(); - if (afterPredicate.test(snapshotInfo)) { - syncSnapshots.add(snapshotInfo.maybeWithoutIndices(indices)); - } - } - refListener.onResponse(null); - } - - @Override - public void onFailure(Exception e) { - if (ignoreUnavailable) { - logger.warn(Strings.format("failed to fetch snapshot info for [%s]", asyncSnapshotInfo), e); - refListener.onResponse(null); - } else { - refListener.onFailure(e); - } + try (var listeners = new RefCountingListener(listener)) { + final var iterationCompleteListener = listeners.acquire(ignored -> { + totalCount.addAndGet(repositoryTotalCount.get()); + // no need to synchronize access to snapshots: all writes happen-before this read + resultsCount.addAndGet(snapshots.size()); + allSnapshotInfos.add(snapshots); + }); + ThrottledIterator.run( + Iterators.failFast(asyncSnapshotInfoIterator, () -> cancellableTask.isCancelled() || listeners.isFailing()), + (ref, asyncSnapshotInfo) -> { + final var refListener = ActionListener.runBefore(listeners.acquire(), ref::close); + asyncSnapshotInfo.getSnapshotInfo(new ActionListener<>() { + @Override + public void onResponse(SnapshotInfo snapshotInfo) { + if (matchesPredicates(snapshotInfo)) { + repositoryTotalCount.incrementAndGet(); + if (afterPredicate.test(snapshotInfo)) { + syncSnapshots.add(snapshotInfo.maybeWithoutIndices(indices)); } - }); - }, - getSnapshotInfoExecutor.getMaxRunningTasks(), - () -> {}, - () -> {} - ); - } - }) - - // no need to synchronize access to snapshots: all writes happen-before this read - .andThenAccept(ignored -> addResults(repositoryTotalCount.get(), snapshots)) - - .addListener(listener); - } + } + refListener.onResponse(null); + } - private void addResults(int repositoryTotalCount, List snapshots) { - totalCount.addAndGet(repositoryTotalCount); - resultsCount.addAndGet(snapshots.size()); - allSnapshotInfos.add(snapshots); + @Override + public void onFailure(Exception e) { + if (ignoreUnavailable) { + logger.warn(Strings.format("failed to fetch snapshot info for [%s]", asyncSnapshotInfo), e); + refListener.onResponse(null); + } else { + refListener.onFailure(e); + } + } + }); + }, + getSnapshotInfoExecutor.getMaxRunningTasks(), + () -> {}, + () -> iterationCompleteListener.onResponse(null) + ); + } } private GetSnapshotsResponse buildResponse() { From 2ef42f4c1d483f613431869416025618b61ae73c Mon Sep 17 00:00:00 2001 From: Tim Grein Date: Wed, 17 Jul 2024 10:10:55 +0200 Subject: [PATCH 11/65] [Inference API] Align method names in InferencePlugin#getSettings (#110908) --- .../elasticsearch/xpack/inference/InferencePlugin.java | 8 ++++---- .../elasticsearch/xpack/inference/common/Truncator.java | 2 +- .../xpack/inference/external/http/HttpClientManager.java | 2 +- .../xpack/inference/external/http/HttpSettings.java | 2 +- .../xpack/inference/logging/ThrottlerManager.java | 2 +- .../java/org/elasticsearch/xpack/inference/Utils.java | 8 ++++---- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index 6afa4abca1a67..fce2c54c535c9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -276,11 +276,11 @@ public List> getExecutorBuilders(Settings settingsToUse) { @Override public List> getSettings() { return Stream.of( - HttpSettings.getSettings(), - HttpClientManager.getSettings(), - ThrottlerManager.getSettings(), + HttpSettings.getSettingsDefinitions(), + HttpClientManager.getSettingsDefinitions(), + ThrottlerManager.getSettingsDefinitions(), RetrySettings.getSettingsDefinitions(), - Truncator.getSettings(), + Truncator.getSettingsDefinitions(), RequestExecutorServiceSettings.getSettingsDefinitions(), List.of(SKIP_VALIDATE_AND_START) ).flatMap(Collection::stream).collect(Collectors.toList()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/common/Truncator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/common/Truncator.java index eabed7f6a7bd3..45ab9b160a8e6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/common/Truncator.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/common/Truncator.java @@ -34,7 +34,7 @@ public class Truncator { Setting.Property.Dynamic ); - public static List> getSettings() { + public static List> getSettingsDefinitions() { return List.of(REDUCTION_PERCENTAGE_SETTING); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/HttpClientManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/HttpClientManager.java index 8be3b76f68c54..e5d76b9bb5570 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/HttpClientManager.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/HttpClientManager.java @@ -153,7 +153,7 @@ private IdleConnectionEvictor createConnectionEvictor() { return new IdleConnectionEvictor(threadPool, connectionManager, evictionInterval, connectionMaxIdle); } - public static List> getSettings() { + public static List> getSettingsDefinitions() { return List.of( MAX_TOTAL_CONNECTIONS, MAX_ROUTE_CONNECTIONS, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/HttpSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/HttpSettings.java index 642b76d775173..b2825d1b79cbf 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/HttpSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/HttpSettings.java @@ -45,7 +45,7 @@ private void setMaxResponseSize(ByteSizeValue maxResponseSize) { this.maxResponseSize = maxResponseSize; } - public static List> getSettings() { + public static List> getSettingsDefinitions() { return List.of(MAX_HTTP_RESPONSE_SIZE); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/logging/ThrottlerManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/logging/ThrottlerManager.java index 2a84494d6af21..d333cc92d61de 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/logging/ThrottlerManager.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/logging/ThrottlerManager.java @@ -102,7 +102,7 @@ public void close() { throttler.close(); } - public static List> getSettings() { + public static List> getSettingsDefinitions() { return List.of(STATS_RESET_INTERVAL_SETTING, LOGGER_WAIT_DURATION_SETTING); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/Utils.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/Utils.java index fe33a3d092667..fb841bd6953cb 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/Utils.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/Utils.java @@ -65,11 +65,11 @@ public static ClusterService mockClusterService(Settings settings) { var clusterService = mock(ClusterService.class); var registeredSettings = Stream.of( - HttpSettings.getSettings(), - HttpClientManager.getSettings(), - ThrottlerManager.getSettings(), + HttpSettings.getSettingsDefinitions(), + HttpClientManager.getSettingsDefinitions(), + ThrottlerManager.getSettingsDefinitions(), RetrySettings.getSettingsDefinitions(), - Truncator.getSettings(), + Truncator.getSettingsDefinitions(), RequestExecutorServiceSettings.getSettingsDefinitions() ).flatMap(Collection::stream).collect(Collectors.toSet()); From da5392134fcb806198921b0b25620141ce6a4798 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 17 Jul 2024 11:39:02 +0200 Subject: [PATCH 12/65] ESQL: Validate unique plan attribute names (#110488) * Enforce an invariant in our dependency checker so that logical plans never have duplicate output attribute names or ids. * Fix ROW to not produce columns with duplicate names. * Fix ResolveUnionTypes to not create multiple synthetic field attributes for the same union type. * Add tests for commands using the same column name more than once. * Update docs w.r.t. how commands behave if they are used with duplicate column names. --- docs/changelog/110488.yaml | 6 ++ .../esql/processing-commands/dissect.asciidoc | 2 + .../esql/processing-commands/enrich.asciidoc | 7 +- .../esql/processing-commands/eval.asciidoc | 4 +- .../esql/processing-commands/grok.asciidoc | 15 ++++ .../esql/processing-commands/keep.asciidoc | 4 +- .../esql/processing-commands/lookup.asciidoc | 1 + .../esql/processing-commands/rename.asciidoc | 4 +- .../esql/processing-commands/stats.asciidoc | 3 + .../esql/source-commands/row.asciidoc | 1 + .../xpack/esql/CsvTestsDataLoader.java | 5 +- .../src/main/resources/addresses.csv | 4 ++ .../src/main/resources/dissect.csv-spec | 25 +++++++ .../src/main/resources/docs.csv-spec | 17 +++++ .../src/main/resources/drop.csv-spec | 50 ++++++++++++++ .../src/main/resources/enrich.csv-spec | 68 +++++++++++++++++++ .../src/main/resources/eval.csv-spec | 23 +++++++ .../src/main/resources/grok.csv-spec | 25 +++++++ .../src/main/resources/keep.csv-spec | 60 ++++++++++++++++ .../src/main/resources/mapping-addresses.json | 44 ++++++++++++ .../src/main/resources/rename.csv-spec | 39 +++++++++++ .../src/main/resources/row.csv-spec | 23 ++++++- .../src/main/resources/stats.csv-spec | 33 +++++++++ .../xpack/esql/action/EsqlCapabilities.java | 12 +++- .../xpack/esql/analysis/Analyzer.java | 24 +++++-- .../xpack/esql/analysis/AnalyzerRules.java | 12 +--- .../xpack/esql/optimizer/OptimizerRules.java | 22 +++++- .../xpack/esql/parser/LogicalPlanBuilder.java | 4 +- .../xpack/esql/plan/logical/Rename.java | 7 ++ 29 files changed, 516 insertions(+), 28 deletions(-) create mode 100644 docs/changelog/110488.yaml create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json diff --git a/docs/changelog/110488.yaml b/docs/changelog/110488.yaml new file mode 100644 index 0000000000000..fbb439f20fc96 --- /dev/null +++ b/docs/changelog/110488.yaml @@ -0,0 +1,6 @@ +pr: 110488 +summary: "ESQL: Validate unique plan attribute names" +area: ES|QL +type: bug +issues: + - 110541 diff --git a/docs/reference/esql/processing-commands/dissect.asciidoc b/docs/reference/esql/processing-commands/dissect.asciidoc index 72c811a318a5d..82138aa238087 100644 --- a/docs/reference/esql/processing-commands/dissect.asciidoc +++ b/docs/reference/esql/processing-commands/dissect.asciidoc @@ -20,6 +20,8 @@ multiple values, `DISSECT` will process each value. `pattern`:: A <>. +If a field name conflicts with an existing column, the existing column is dropped. +If a field name is used more than once, only the rightmost duplicate creates a column. ``:: A string used as the separator between appended values, when using the <>. diff --git a/docs/reference/esql/processing-commands/enrich.asciidoc b/docs/reference/esql/processing-commands/enrich.asciidoc index f34e77dbf5c23..2ece5a63e7570 100644 --- a/docs/reference/esql/processing-commands/enrich.asciidoc +++ b/docs/reference/esql/processing-commands/enrich.asciidoc @@ -31,11 +31,16 @@ name as the `match_field` defined in the <>. The enrich fields from the enrich index that are added to the result as new columns. If a column with the same name as the enrich field already exists, the existing column will be replaced by the new column. If not specified, each of -the enrich fields defined in the policy is added +the enrich fields defined in the policy is added. +A column with the same name as the enrich field will be dropped unless the +enrich field is renamed. `new_nameX`:: Enables you to change the name of the column that's added for each of the enrich fields. Defaults to the enrich field name. +If a column has the same name as the new name, it will be discarded. +If a name (new or original) occurs more than once, only the rightmost duplicate +creates a new column. *Description* diff --git a/docs/reference/esql/processing-commands/eval.asciidoc b/docs/reference/esql/processing-commands/eval.asciidoc index f77249736c1b3..00a7764d24004 100644 --- a/docs/reference/esql/processing-commands/eval.asciidoc +++ b/docs/reference/esql/processing-commands/eval.asciidoc @@ -16,10 +16,12 @@ EVAL [column1 =] value1[, ..., [columnN =] valueN] `columnX`:: The column name. +If a column with the same name already exists, the existing column is dropped. +If a column name is used more than once, only the rightmost duplicate creates a column. `valueX`:: The value for the column. Can be a literal, an expression, or a -<>. +<>. Can use columns defined left of this one. *Description* diff --git a/docs/reference/esql/processing-commands/grok.asciidoc b/docs/reference/esql/processing-commands/grok.asciidoc index d631d17f7a42c..57c55a5bad53f 100644 --- a/docs/reference/esql/processing-commands/grok.asciidoc +++ b/docs/reference/esql/processing-commands/grok.asciidoc @@ -20,6 +20,9 @@ multiple values, `GROK` will process each value. `pattern`:: A grok pattern. +If a field name conflicts with an existing column, the existing column is discarded. +If a field name is used more than once, a multi-valued column will be created with one value +per each occurrence of the field name. *Description* @@ -67,4 +70,16 @@ include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime] |=== include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime-result] |=== + +If a field name is used more than once, `GROK` creates a multi-valued +column: + +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames-result] +|=== // end::examples[] diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc index 468f459411640..3dbd0c69d8222 100644 --- a/docs/reference/esql/processing-commands/keep.asciidoc +++ b/docs/reference/esql/processing-commands/keep.asciidoc @@ -16,6 +16,8 @@ KEEP columns `columns`:: A comma-separated list of columns to keep. Supports wildcards. +See below for the behavior in case an existing column matches multiple +given wildcards or column names. *Description* @@ -29,7 +31,7 @@ Fields are added in the order they appear. If one field matches multiple express 2. Partial wildcard expressions (for example: `fieldNam*`) 3. Wildcard only (`*`) -If a field matches two expressions with the same precedence, the right-most expression wins. +If a field matches two expressions with the same precedence, the rightmost expression wins. Refer to the examples for illustrations of these precedence rules. diff --git a/docs/reference/esql/processing-commands/lookup.asciidoc b/docs/reference/esql/processing-commands/lookup.asciidoc index 426527bf4d2d6..7bb3a5791deef 100644 --- a/docs/reference/esql/processing-commands/lookup.asciidoc +++ b/docs/reference/esql/processing-commands/lookup.asciidoc @@ -18,6 +18,7 @@ LOOKUP table ON match_field1[, match_field2, ...] `table`:: The name of the `table` provided in the request to match. +If the table's column names conflict with existing columns, the existing columns will be dropped. `match_field`:: The fields in the input to match against the table. diff --git a/docs/reference/esql/processing-commands/rename.asciidoc b/docs/reference/esql/processing-commands/rename.asciidoc index 8507a826f085d..41e2ce9298ae8 100644 --- a/docs/reference/esql/processing-commands/rename.asciidoc +++ b/docs/reference/esql/processing-commands/rename.asciidoc @@ -17,7 +17,9 @@ RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN] The name of a column you want to rename. `new_nameX`:: -The new name of the column. +The new name of the column. If it conflicts with an existing column name, +the existing column is dropped. If multiple columns are renamed to the same +name, all but the rightmost column with the same new name are dropped. *Description* diff --git a/docs/reference/esql/processing-commands/stats.asciidoc b/docs/reference/esql/processing-commands/stats.asciidoc index 34ae81fd5414e..7377522a93201 100644 --- a/docs/reference/esql/processing-commands/stats.asciidoc +++ b/docs/reference/esql/processing-commands/stats.asciidoc @@ -18,12 +18,15 @@ STATS [column1 =] expression1[, ..., [columnN =] expressionN] `columnX`:: The name by which the aggregated value is returned. If omitted, the name is equal to the corresponding expression (`expressionX`). +If multiple columns have the same name, all but the rightmost column with this +name will be ignored. `expressionX`:: An expression that computes an aggregated value. `grouping_expressionX`:: An expression that outputs the values to group by. +If its name coincides with one of the computed columns, that column will be ignored. NOTE: Individual `null` values are skipped when computing aggregations. diff --git a/docs/reference/esql/source-commands/row.asciidoc b/docs/reference/esql/source-commands/row.asciidoc index 5c81d67c4ac22..28a4f29ae9a5b 100644 --- a/docs/reference/esql/source-commands/row.asciidoc +++ b/docs/reference/esql/source-commands/row.asciidoc @@ -16,6 +16,7 @@ ROW column1 = value1[, ..., columnN = valueN] `columnX`:: The column name. +In case of duplicate column names, only the rightmost duplicate creates a column. `valueX`:: The value for the column. Can be a literal, an expression, or a diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 530b2bc01b3d6..f9b768d67d574 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -96,8 +96,8 @@ public class CsvTestsDataLoader { "cartesian_multipolygons.csv" ); private static final TestsDataset DISTANCES = new TestsDataset("distances", "mapping-distances.json", "distances.csv"); - private static final TestsDataset K8S = new TestsDataset("k8s", "k8s-mappings.json", "k8s.csv", "k8s-settings.json", true); + private static final TestsDataset ADDRESSES = new TestsDataset("addresses", "mapping-addresses.json", "addresses.csv", null, true); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), @@ -121,7 +121,8 @@ public class CsvTestsDataLoader { Map.entry(AIRPORT_CITY_BOUNDARIES.indexName, AIRPORT_CITY_BOUNDARIES), Map.entry(CARTESIAN_MULTIPOLYGONS.indexName, CARTESIAN_MULTIPOLYGONS), Map.entry(K8S.indexName, K8S), - Map.entry(DISTANCES.indexName, DISTANCES) + Map.entry(DISTANCES.indexName, DISTANCES), + Map.entry(ADDRESSES.indexName, ADDRESSES) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv new file mode 100644 index 0000000000000..0eea102400d60 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv @@ -0,0 +1,4 @@ +street:keyword,number:keyword,zip_code:keyword,city.name:keyword,city.country.name:keyword,city.country.continent.name:keyword,city.country.continent.planet.name:keyword,city.country.continent.planet.galaxy:keyword +Keizersgracht,281,1016 ED,Amsterdam,Netherlands,Europe,Earth,Milky Way +Kearny St,88,CA 94108,San Francisco,United States of America,North America,Earth,Milky Way +Marunouchi,2-7-2,100-7014,Tokyo,Japan,Asia,Earth,Milky Way diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 812198c324217..8c4e797b7982d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -26,6 +26,19 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam Georgi | left | Georgi Facello | right | Facello ; +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| DISSECT city.name "%{city.country.continent.planet.name} %{?}" +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | null +United States of America | San Francisco | San +Japan | Tokyo | null +; + shadowingSelf FROM employees | KEEP first_name, last_name @@ -50,6 +63,18 @@ last_name:keyword | left:keyword | foo:keyword | middle:keyword | ri Facello | left | Georgi1 Georgi2 Facello | middle | right | Georgi1 | Georgi2 | Facello ; +shadowingInternal +FROM employees +| KEEP first_name, last_name +| WHERE last_name == "Facello" +| EVAL name = concat(first_name, "1 ", last_name) +| DISSECT name "%{foo} %{foo}" +; + +first_name:keyword | last_name:keyword | name:keyword | foo:keyword +Georgi | Facello | Georgi1 Facello | Facello +; + complexPattern ROW a = "1953-01-23T12:15:00Z - some text - 127.0.0.1;" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index d34620a9e118d..15fe6853ae491 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -436,6 +436,23 @@ ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected" // end::grokWithEscape-result[] ; +grokWithDuplicateFieldNames +// tag::grokWithDuplicateFieldNames[] +FROM addresses +| KEEP city.name, zip_code +| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +// end::grokWithDuplicateFieldNames[] +| SORT city.name +; + +// tag::grokWithDuplicateFieldNames-result[] +city.name:keyword | zip_code:keyword | zip_parts:keyword +Amsterdam | 1016 ED | ["1016", "ED"] +San Francisco | CA 94108 | ["CA", "94108"] +Tokyo | 100-7014 | null +// end::grokWithDuplicateFieldNames-result[] +; + basicDissect // tag::basicDissect[] ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec index 35530cf6fdb8e..9886d6cce0ca2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec @@ -122,3 +122,53 @@ FROM employees | STATS COUNT(*), MIN(salary * 10), MAX(languages)| DROP `COUNT( MIN(salary * 10):i | MAX(languages):i 253240 | 5 ; + +// Not really shadowing, but let's keep the name consistent with the other command's tests +shadowingInternal +FROM employees +| SORT emp_no ASC +| KEEP emp_no, first_name, last_name +| DROP last_name, last_name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +shadowingInternalWildcard +FROM employees +| SORT emp_no ASC +| KEEP emp_no, first_name, last_name +| DROP last*name, last*name, last*, last_name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +subfields +FROM addresses +| DROP city.country.continent.planet.name, city.country.continent.name, city.country.name, number, street, zip_code, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.planet.galaxy:keyword | city.name:keyword +Milky Way | Amsterdam +Milky Way | San Francisco +Milky Way | Tokyo +; + +subfieldsWildcard +FROM addresses +| DROP *.name, number, street, zip_code, *ame +; + +city.country.continent.planet.galaxy:keyword +Milky Way +Milky Way +Milky Way +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index fc8c48afdf8cc..cf32e028b23bc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -69,6 +69,34 @@ ROW left = "left", foo = "foo", client_ip = "172.21.0.5", env = "env", right = " left:keyword | client_ip:keyword | env:keyword | right:keyword | foo:keyword ; +shadowingSubfields +required_capability: enrich_load +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") +| ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:text +Netherlands | Amsterdam | null +United States of America | South San Francisco | San Francisco Int'l +Japan | Tokyo | null +; + +shadowingSubfieldsLimit0 +required_capability: enrich_load +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") +| ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport +| SORT city.name +| LIMIT 0 +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:text +; + shadowingSelf required_capability: enrich_load ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right" @@ -107,6 +135,46 @@ ROW left = "left", airport = "Zurich Airport ZRH", city = "Zürich", middle = "m left:keyword | city:keyword | middle:keyword | right:keyword | airport:text | region:text | city_boundary:geo_shape ; +shadowingInternal +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH x = airport, x = region +; + +city:keyword | x:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport = region +; + +city:keyword | airport:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit2 +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport, airport = region +; + +city:keyword | airport:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit3 +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport = region, airport +; + +city:keyword | airport:text +Zürich | Zurich Int'l +; + simple required_capability: enrich_load diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec index 3df3b85e5e3af..87f54fbf0f174 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec @@ -15,6 +15,19 @@ left:keyword | right:keyword | x:integer left | right | 1 ; +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.country.continent.planet.name = to_upper(city.country.continent.planet.name) +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | EARTH +United States of America | San Francisco | EARTH +Japan | Tokyo | EARTH +; + shadowingSelf ROW left = "left", x = 10000 , right = "right" | EVAL x = x + 1 @@ -33,6 +46,16 @@ left:keyword | middle:keyword | right:keyword | x:integer | y:integer left | middle | right | 9 | 10 ; +shadowingInternal +ROW x = 10000 +| EVAL x = x + 1, x = x - 2 +; + +x:integer +9999 +; + + withMath row a = 1 | eval b = 2 + 3; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index 9d574eed7be6b..d9857e8c122ef 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -26,6 +26,19 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam Georgi | left | Georgi Facello | right | Facello ; +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| GROK city.name "%{WORD:city.country.continent.planet.name} %{WORD}" +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | null +United States of America | San Francisco | San +Japan | Tokyo | null +; + shadowingSelf FROM employees | KEEP first_name, last_name @@ -50,6 +63,18 @@ last_name:keyword | left:keyword | foo:keyword | middle:keyword | ri Facello | left | Georgi1 Georgi2 Facello | middle | right | Georgi1 | Georgi2 | Facello ; +shadowingInternal +FROM addresses +| KEEP city.name, zip_code +| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +| SORT city.name +; + +city.name:keyword | zip_code:keyword | zip_parts:keyword +Amsterdam | 1016 ED | ["1016", "ED"] +San Francisco | CA 94108 | ["CA", "94108"] +Tokyo | 100-7014 | null +; complexPattern ROW a = "1953-01-23T12:15:00Z 127.0.0.1 some.email@foo.com 42" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec index 14a3807b8729c..bcce35eb81e0f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec @@ -539,3 +539,63 @@ c:i 1 1 ; + +shadowingInternal +FROM employees +| SORT emp_no ASC +| KEEP last_name, emp_no, last_name +| LIMIT 2 +; + +emp_no:integer | last_name:keyword + 10001 | Facello + 10002 | Simmel +; + +shadowingInternalWildcard +FROM employees +| SORT emp_no ASC +| KEEP last*name, emp_no, last*name, first_name, last*, gender, last* +| LIMIT 2 +; + +emp_no:integer | first_name:keyword | gender:keyword | last_name:keyword + 10001 | Georgi | M | Facello + 10002 | Bezalel | F | Simmel +; + +shadowingInternalWildcardAndExplicit +FROM employees +| SORT emp_no ASC +| KEEP last*name, emp_no, last_name, first_name, last*, languages, last_name, gender, last*name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword | languages:integer | last_name:keyword | gender:keyword + 10001 | Georgi | 2 | Facello | M + 10002 | Bezalel | 5 | Simmel | F +; + +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.name:keyword | city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Europe | Netherlands | Amsterdam | Earth +North America | United States of America | San Francisco | Earth +Asia | Japan | Tokyo | Earth +; + +shadowingSubfieldsWildcard +FROM addresses +| KEEP *name, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.name:keyword | city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Europe | Netherlands | Amsterdam | Earth +North America | United States of America | San Francisco | Earth +Asia | Japan | Tokyo | Earth +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json new file mode 100644 index 0000000000000..679efb3c8d38b --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json @@ -0,0 +1,44 @@ +{ + "properties" : { + "street" : { + "type": "keyword" + }, + "number" : { + "type": "keyword" + }, + "zip_code": { + "type": "keyword" + }, + "city" : { + "properties": { + "name": { + "type": "keyword" + }, + "country": { + "properties": { + "name": { + "type": "keyword" + }, + "continent": { + "properties": { + "name": { + "type": "keyword" + }, + "planet": { + "properties": { + "name": { + "type": "keyword" + }, + "galaxy": { + "type": "keyword" + } + } + } + } + } + } + } + } + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec index 1e830486cc7c7..ca4c627cae749 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec @@ -174,3 +174,42 @@ avg_worked_seconds:l | birth_date:date | emp_no:i | first_n 341158890 | 1961-10-15T00:00:00.000Z | 10060 | Breannda | M | 1.42 | 1.4199999570846558 | 1.419921875 | 1.42 | 1987-11-02T00:00:00.000Z | [false, false, false, true]| [Business Analyst, Data Scientist, Senior Team Lead] | 2 | 2 | 2 | 2 | Billingsley | 29175 | [-1.76, -0.85] | [-1, 0] | [-0.85, -1.76] | [-1, 0] | true | 29175 246355863 | null | 10042 | Magy | F | 1.44 | 1.440000057220459 | 1.4404296875 | 1.44 | 1993-03-21T00:00:00.000Z | null | [Architect, Business Analyst, Internship, Junior Developer] | 3 | 3 | 3 | 3 | Stamatiou | 30404 | [-9.28, 9.42] | [-9, 9] | [-9.28, 9.42] | [-9, 9] | true | 30404 ; + +shadowing +FROM employees +| SORT emp_no ASC +| KEEP emp_no, first_name, last_name +| RENAME emp_no AS last_name +| LIMIT 2 +; + +last_name:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name +| RENAME city.name AS city.country.continent.planet.name, city.country.name AS city.country.continent.name +| SORT city.country.continent.planet.name +; + +city.country.continent.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam +United States of America | San Francisco +Japan | Tokyo +; + +shadowingInternal +FROM employees +| SORT emp_no ASC +| KEEP emp_no, last_name +| RENAME emp_no AS x, last_name AS x +| LIMIT 2 +; + +x:keyword +Facello +Simmel +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index bb1cf7358ca74..da640b6306299 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -36,6 +36,24 @@ a:integer // end::multivalue-result[] ; +shadowingInternal +required_capability: unique_names +ROW a = 1, a = 2; + +a:integer + 2 +; + +shadowingInternalSubfields +required_capability: unique_names +// Fun fact: "Sissi" is an actual exoplanet name, after the character from the movie with the same name. A.k.a. HAT-P-14 b. +ROW city.country.continent.planet.name = "Earth", city.country.continent.name = "Netherlands", city.country.continent.planet.name = "Sissi" +; + +city.country.continent.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Sissi +; + unsignedLongLiteral ROW long_max = 9223372036854775807, ul_start = 9223372036854775808, ul_end = 18446744073709551615, double=18446744073709551616; @@ -70,10 +88,11 @@ a:integer | b:integer | c:null | z:integer ; evalRowWithNull2 +required_capability: unique_names row a = 1, null, b = 2, c = null, null | eval z = a+b; -a:integer | null:null | b:integer | c:null | null:null | z:integer -1 | null | 2 | null | null | 3 +a:integer | b:integer | c:null | null:null | z:integer + 1 | 2 | null | null | 3 ; evalRowWithNull3 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 596e671679eb3..b64dcf7bf5ca4 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -1886,6 +1886,39 @@ w_avg:double null ; +shadowingInternal +FROM employees +| STATS x = MAX(emp_no), x = MIN(emp_no) +; + +x:integer +10001 +; + +shadowingInternalWithGroup +FROM employees +| STATS x = MAX(emp_no), x = MIN(emp_no) BY x = gender +| SORT x ASC +; + +x:keyword +F +M +null +; + +shadowingTheGroup +FROM employees +| STATS gender = MAX(emp_no), gender = MIN(emp_no) BY gender +| SORT gender ASC +; + +gender:keyword +F +M +null +; + docsStatsMvGroup // tag::mv-group[] ROW i=1, a=["a", "b"] | STATS MIN(i) BY a | SORT a ASC diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index be08d61ab8d6c..98c6d8f4332be 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -152,14 +152,20 @@ public enum Cap { FIX_COUNT_DISTINCT_SOURCE_ERROR, /** - * Use RangeQuery for BinaryComparison on DateTime fields. - * */ + * Use RangeQuery for BinaryComparison on DateTime fields. + */ RANGEQUERY_FOR_DATETIME, /** * Add tests for #105383, STATS BY constant. */ - STATS_BY_CONSTANT; + STATS_BY_CONSTANT, + + /** + * Fix for non-unique attribute names in ROW and logical plans. + * https://github.com/elastic/elasticsearch/issues/110541 + */ + UNIQUE_NAMES; private final boolean snapshotOnly; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index cc12d3730f495..a691f88f29f99 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1068,13 +1068,29 @@ public static Expression castStringLiteral(Expression from, DataType target) { * Any fields which could not be resolved by conversion functions will be converted to UnresolvedAttribute instances in a later rule * (See UnresolveUnionTypes below). */ - private static class ResolveUnionTypes extends BaseAnalyzerRule { + private static class ResolveUnionTypes extends Rule { record TypeResolutionKey(String fieldName, DataType fieldType) {} + private List unionFieldAttributes; + @Override - protected LogicalPlan doRule(LogicalPlan plan) { - List unionFieldAttributes = new ArrayList<>(); + public LogicalPlan apply(LogicalPlan plan) { + unionFieldAttributes = new ArrayList<>(); + // Collect field attributes from previous runs + plan.forEachUp(EsRelation.class, rel -> { + for (Attribute attr : rel.output()) { + if (attr instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField) { + unionFieldAttributes.add(fa); + } + } + }); + + return plan.transformUp(LogicalPlan.class, p -> p.resolved() || p.childrenResolved() == false ? p : doRule(p)); + } + + private LogicalPlan doRule(LogicalPlan plan) { + int alreadyAddedUnionFieldAttributes = unionFieldAttributes.size(); // See if the eval function has an unresolved MultiTypeEsField field // Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge) plan = plan.transformExpressionsOnly( @@ -1082,7 +1098,7 @@ protected LogicalPlan doRule(LogicalPlan plan) { convert -> resolveConvertFunction(convert, unionFieldAttributes) ); // If no union fields were generated, return the plan as is - if (unionFieldAttributes.isEmpty()) { + if (unionFieldAttributes.size() == alreadyAddedUnionFieldAttributes) { return plan; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java index 3314129fae405..242c947e56de9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java @@ -20,8 +20,6 @@ import java.util.function.Predicate; import java.util.function.Supplier; -import static java.util.Collections.singletonList; - public final class AnalyzerRules { public abstract static class AnalyzerRule extends Rule { @@ -138,14 +136,6 @@ public static List maybeResolveAgainstList( ) .toList(); - return singletonList( - ua.withUnresolvedMessage( - "Reference [" - + ua.qualifiedName() - + "] is ambiguous (to disambiguate use quotes or qualifiers); " - + "matches any of " - + refs - ) - ); + throw new IllegalStateException("Reference [" + ua.qualifiedName() + "] is ambiguous; " + "matches any of " + refs); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java index bff76fb1a706e..d9141d737c949 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java @@ -8,8 +8,10 @@ package org.elasticsearch.xpack.esql.optimizer; import org.elasticsearch.xpack.esql.common.Failures; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.plan.QueryPlan; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -36,6 +38,9 @@ import org.elasticsearch.xpack.esql.plan.physical.RowExec; import org.elasticsearch.xpack.esql.plan.physical.ShowExec; +import java.util.HashSet; +import java.util.Set; + import static org.elasticsearch.xpack.esql.common.Failure.fail; class OptimizerRules { @@ -49,9 +54,24 @@ void checkPlan(P p, Failures failures) { AttributeSet input = p.inputSet(); AttributeSet generated = generates(p); AttributeSet missing = refs.subtract(input).subtract(generated); - if (missing.size() > 0) { + if (missing.isEmpty() == false) { failures.add(fail(p, "Plan [{}] optimized incorrectly due to missing references {}", p.nodeString(), missing)); } + + Set outputAttributeNames = new HashSet<>(); + Set outputAttributeIds = new HashSet<>(); + for (Attribute outputAttr : p.output()) { + if (outputAttributeNames.add(outputAttr.name()) == false || outputAttributeIds.add(outputAttr.id()) == false) { + failures.add( + fail( + p, + "Plan [{}] optimized incorrectly due to duplicate output attribute {}", + p.nodeString(), + outputAttr.toString() + ) + ); + } + } } protected AttributeSet references(P p) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java index f2603eedf8b84..586b18002562d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java @@ -73,6 +73,7 @@ import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.source; import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.typedParsing; import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.visitList; +import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputExpressions; import static org.elasticsearch.xpack.esql.plan.logical.Enrich.Mode; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToInt; @@ -234,8 +235,9 @@ public Map visitCommandOptions(EsqlBaseParser.CommandOptionsCont } @Override + @SuppressWarnings("unchecked") public LogicalPlan visitRowCommand(EsqlBaseParser.RowCommandContext ctx) { - return new Row(source(ctx), visitFields(ctx.fields())); + return new Row(source(ctx), (List) (List) mergeOutputExpressions(visitFields(ctx.fields()), List.of())); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java index 5e4b45d7127fe..f0b38d281474e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.plan.logical; import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; @@ -28,6 +29,12 @@ public List renamings() { return renamings; } + @Override + public List output() { + // Rename is mapped to a Project during analysis; we do not compute the output here. + throw new IllegalStateException("Should never reach here."); + } + @Override public boolean expressionsResolved() { for (var alias : renamings) { From 167cf33e56ae57fb6f6cbaa0a437ef506eb7a679 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Wed, 17 Jul 2024 11:40:18 +0100 Subject: [PATCH 13/65] [ML] Refactor the common Elser and Elasticsearch inference services (#110937) Use common base classes for the internal services, shared task settings and the model configs --- .../AdaptiveAllocationSettingsTests.java | 44 ++++ .../InferenceNamedWriteablesProvider.java | 35 ++- .../inference/services/ServiceUtils.java | 3 - .../BaseElasticsearchInternalService.java | 206 ++++++++++++++++ .../CustomElandEmbeddingModel.java | 19 +- .../CustomElandInternalServiceSettings.java | 95 +------- ...dInternalTextEmbeddingServiceSettings.java | 53 ++--- .../elasticsearch/CustomElandModel.java | 48 ++-- .../elasticsearch/CustomElandRerankModel.java | 24 +- .../ElasticsearchInternalModel.java | 59 +++++ .../ElasticsearchInternalService.java | 165 +++++++------ .../ElasticsearchInternalServiceSettings.java | 219 ++++++++++++++++-- .../elasticsearch/ElasticsearchModel.java | 24 -- ...lingualE5SmallInternalServiceSettings.java | 99 ++------ .../MultilingualE5SmallModel.java | 24 +- .../services/elser/ElserInternalModel.java | 37 ++- .../services/elser/ElserInternalService.java | 158 ++----------- .../elser/ElserInternalServiceSettings.java | 120 ++-------- .../settings/InternalServiceSettings.java | 161 ------------- ...rnalTextEmbeddingServiceSettingsTests.java | 4 +- ...ticsearchInternalServiceSettingsTests.java | 132 +++++++++++ .../ElasticsearchInternalServiceTests.java | 29 ++- ...alE5SmallInternalServiceSettingsTests.java | 46 +--- .../ElserInternalServiceSettingsTests.java | 117 ++-------- 24 files changed, 907 insertions(+), 1014 deletions(-) create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationSettingsTests.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalModel.java delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchModel.java delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/settings/InternalServiceSettings.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettingsTests.java diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationSettingsTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationSettingsTests.java new file mode 100644 index 0000000000000..c86648f10f08b --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationSettingsTests.java @@ -0,0 +1,44 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.ml.inference.assignment; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractWireSerializingTestCase; + +import java.io.IOException; + +public class AdaptiveAllocationSettingsTests extends AbstractWireSerializingTestCase { + + public static AdaptiveAllocationsSettings testInstance() { + return new AdaptiveAllocationsSettings( + randomBoolean() ? null : randomBoolean(), + randomBoolean() ? null : randomIntBetween(1, 2), + randomBoolean() ? null : randomIntBetween(2, 4) + ); + } + + public static AdaptiveAllocationsSettings mutate(AdaptiveAllocationsSettings instance) { + boolean mutatedEnabled = Boolean.FALSE.equals(instance.getEnabled()); + return new AdaptiveAllocationsSettings(mutatedEnabled, instance.getMinNumberOfAllocations(), instance.getMaxNumberOfAllocations()); + } + + @Override + protected Writeable.Reader instanceReader() { + return AdaptiveAllocationsSettings::new; + } + + @Override + protected AdaptiveAllocationsSettings createTestInstance() { + return testInstance(); + } + + @Override + protected AdaptiveAllocationsSettings mutateInstance(AdaptiveAllocationsSettings instance) throws IOException { + return mutate(instance); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java index f8ce9df1fb194..476ab3355a0b8 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java @@ -98,23 +98,7 @@ public static List getNamedWriteables() { // Default secret settings namedWriteables.add(new NamedWriteableRegistry.Entry(SecretSettings.class, DefaultSecretSettings.NAME, DefaultSecretSettings::new)); - addInternalElserNamedWriteables(namedWriteables); - - // Internal TextEmbedding service config - namedWriteables.add( - new NamedWriteableRegistry.Entry( - ServiceSettings.class, - ElasticsearchInternalServiceSettings.NAME, - ElasticsearchInternalServiceSettings::new - ) - ); - namedWriteables.add( - new NamedWriteableRegistry.Entry( - ServiceSettings.class, - MultilingualE5SmallInternalServiceSettings.NAME, - MultilingualE5SmallInternalServiceSettings::new - ) - ); + addInternalNamedWriteables(namedWriteables); addHuggingFaceNamedWriteables(namedWriteables); addOpenAiNamedWriteables(namedWriteables); @@ -374,13 +358,28 @@ private static void addGoogleVertexAiNamedWriteables(List namedWriteables) { + private static void addInternalNamedWriteables(List namedWriteables) { namedWriteables.add( new NamedWriteableRegistry.Entry(ServiceSettings.class, ElserInternalServiceSettings.NAME, ElserInternalServiceSettings::new) ); namedWriteables.add( new NamedWriteableRegistry.Entry(TaskSettings.class, ElserMlNodeTaskSettings.NAME, ElserMlNodeTaskSettings::new) ); + namedWriteables.add( + new NamedWriteableRegistry.Entry( + ServiceSettings.class, + ElasticsearchInternalServiceSettings.NAME, + ElasticsearchInternalServiceSettings::new + ) + ); + namedWriteables.add( + new NamedWriteableRegistry.Entry( + ServiceSettings.class, + MultilingualE5SmallInternalServiceSettings.NAME, + MultilingualE5SmallInternalServiceSettings::new + ) + ); + } private static void addChunkedInferenceResultsNamedWriteables(List namedWriteables) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java index 3f3a61269f3e4..7e46dcfea7592 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java @@ -429,9 +429,6 @@ public static Integer extractOptionalPositiveInteger( if (optionalField != null && optionalField <= 0) { validationException.addValidationError(ServiceUtils.mustBeAPositiveIntegerErrorMessage(settingName, scope, optionalField)); - } - - if (validationException.validationErrors().size() > initialValidationErrorCount) { return null; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java new file mode 100644 index 0000000000000..574ca77d4587e --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java @@ -0,0 +1,206 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.elasticsearch; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.client.internal.OriginSettingClient; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceService; +import org.elasticsearch.inference.InferenceServiceExtension; +import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.core.ClientHelper; +import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; +import org.elasticsearch.xpack.core.ml.action.InferModelAction; +import org.elasticsearch.xpack.core.ml.action.PutTrainedModelAction; +import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; +import org.elasticsearch.xpack.core.ml.action.StopTrainedModelDeploymentAction; +import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; +import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput; +import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfigUpdate; +import org.elasticsearch.xpack.inference.services.elser.ElserInternalModel; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.List; +import java.util.Set; + +import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN; +import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin; + +public abstract class BaseElasticsearchInternalService implements InferenceService { + + protected final OriginSettingClient client; + + private static final Logger logger = LogManager.getLogger(BaseElasticsearchInternalService.class); + + public BaseElasticsearchInternalService(InferenceServiceExtension.InferenceServiceFactoryContext context) { + this.client = new OriginSettingClient(context.client(), ClientHelper.INFERENCE_ORIGIN); + } + + /** + * The task types supported by the service + * @return Set of supported. + */ + protected abstract EnumSet supportedTaskTypes(); + + @Override + public void start(Model model, ActionListener listener) { + if (model instanceof ElasticsearchInternalModel == false) { + listener.onFailure(notElasticsearchModelException(model)); + return; + } + + if (supportedTaskTypes().contains(model.getTaskType()) == false) { + listener.onFailure( + new IllegalStateException(TaskType.unsupportedTaskTypeErrorMsg(model.getConfigurations().getTaskType(), name())) + ); + return; + } + + var esModel = (ElasticsearchInternalModel) model; + var startRequest = esModel.getStartTrainedModelDeploymentActionRequest(); + var responseListener = esModel.getCreateTrainedModelAssignmentActionListener(model, listener); + + client.execute(StartTrainedModelDeploymentAction.INSTANCE, startRequest, responseListener); + } + + @Override + public void stop(String inferenceEntityId, ActionListener listener) { + var request = new StopTrainedModelDeploymentAction.Request(inferenceEntityId); + request.setForce(true); + client.execute( + StopTrainedModelDeploymentAction.INSTANCE, + request, + listener.delegateFailureAndWrap((delegatedResponseListener, response) -> delegatedResponseListener.onResponse(Boolean.TRUE)) + ); + } + + protected static IllegalStateException notElasticsearchModelException(Model model) { + return new IllegalStateException( + "Error starting model, [" + model.getConfigurations().getInferenceEntityId() + "] is not an Elasticsearch service model" + ); + } + + @Override + public void putModel(Model model, ActionListener listener) { + if (model instanceof ElasticsearchInternalModel == false) { + listener.onFailure(notElasticsearchModelException(model)); + return; + } else if (model instanceof MultilingualE5SmallModel e5Model) { + putBuiltInModel(e5Model.getServiceSettings().modelId(), listener); + } else if (model instanceof ElserInternalModel elserModel) { + putBuiltInModel(elserModel.getServiceSettings().modelId(), listener); + } else if (model instanceof CustomElandModel) { + logger.info("Custom eland model detected, model must have been already loaded into the cluster with eland."); + listener.onResponse(Boolean.TRUE); + } else { + listener.onFailure( + new IllegalArgumentException( + "Can not download model automatically for [" + + model.getConfigurations().getInferenceEntityId() + + "] you may need to download it through the trained models API or with eland." + ) + ); + return; + } + } + + private void putBuiltInModel(String modelId, ActionListener listener) { + var input = new TrainedModelInput(List.of("text_field")); // by convention text_field is used + var config = TrainedModelConfig.builder().setInput(input).setModelId(modelId).validate(true).build(); + PutTrainedModelAction.Request putRequest = new PutTrainedModelAction.Request(config, false, true); + executeAsyncWithOrigin( + client, + INFERENCE_ORIGIN, + PutTrainedModelAction.INSTANCE, + putRequest, + ActionListener.wrap(response -> listener.onResponse(Boolean.TRUE), e -> { + if (e instanceof ElasticsearchStatusException esException + && esException.getMessage().contains(PutTrainedModelAction.MODEL_ALREADY_EXISTS_ERROR_MESSAGE_FRAGMENT)) { + listener.onResponse(Boolean.TRUE); + } else { + listener.onFailure(e); + } + }) + ); + } + + @Override + public void isModelDownloaded(Model model, ActionListener listener) { + ActionListener getModelsResponseListener = listener.delegateFailure((delegate, response) -> { + if (response.getResources().count() < 1) { + delegate.onResponse(Boolean.FALSE); + } else { + delegate.onResponse(Boolean.TRUE); + } + }); + + if (model instanceof ElasticsearchInternalModel == false) { + listener.onFailure(notElasticsearchModelException(model)); + } else if (model.getServiceSettings() instanceof ElasticsearchInternalServiceSettings internalServiceSettings) { + String modelId = internalServiceSettings.modelId(); + GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId); + executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); + } else { + listener.onFailure( + new IllegalArgumentException( + "Unable to determine supported model for [" + + model.getConfigurations().getInferenceEntityId() + + "] please verify the request and submit a bug report if necessary." + ) + ); + } + } + + @Override + public boolean isInClusterService() { + return true; + } + + @Override + public void close() throws IOException {} + + public static String selectDefaultModelVariantBasedOnClusterArchitecture( + Set modelArchitectures, + String linuxX86OptimisedModel, + String platformAgnosticModel + ) { + // choose a default model version based on the cluster architecture + boolean homogenous = modelArchitectures.size() == 1; + if (homogenous && modelArchitectures.iterator().next().equals("linux-x86_64")) { + // Use the hardware optimized model + return linuxX86OptimisedModel; + } else { + // default to the platform-agnostic model + return platformAgnosticModel; + } + } + + public static InferModelAction.Request buildInferenceRequest( + String id, + InferenceConfigUpdate update, + List inputs, + InputType inputType, + TimeValue timeout, + boolean chunk + ) { + var request = InferModelAction.Request.forTextInput(id, update, inputs, true, timeout); + request.setPrefixType( + InputType.SEARCH == inputType ? TrainedModelPrefixStrings.PrefixType.SEARCH : TrainedModelPrefixStrings.PrefixType.INGEST + ); + request.setHighPriority(InputType.SEARCH == inputType); + request.setChunked(chunk); + return request; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandEmbeddingModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandEmbeddingModel.java index bb4e0c2c513ac..59203d00e589a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandEmbeddingModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandEmbeddingModel.java @@ -7,34 +7,17 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.TaskType; -import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; - -import java.util.Map; public class CustomElandEmbeddingModel extends CustomElandModel { - public CustomElandEmbeddingModel( - String inferenceEntityId, - TaskType taskType, - String service, - Map serviceSettings, - ConfigurationParseContext context - ) { - this(inferenceEntityId, taskType, service, CustomElandInternalTextEmbeddingServiceSettings.fromMap(serviceSettings, context)); - } - public CustomElandEmbeddingModel( String inferenceEntityId, TaskType taskType, String service, CustomElandInternalTextEmbeddingServiceSettings serviceSettings ) { - super( - new ModelConfigurations(inferenceEntityId, taskType, service, serviceSettings), - serviceSettings.getElasticsearchInternalServiceSettings() - ); + super(inferenceEntityId, taskType, service, serviceSettings); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalServiceSettings.java index 0bb45a6c77a8c..3cc7e0c6c2b53 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalServiceSettings.java @@ -7,27 +7,19 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.elasticsearch.TransportVersion; -import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.inference.ModelConfigurations; -import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; -import org.elasticsearch.xpack.inference.services.ServiceUtils; import java.io.IOException; -import java.util.Map; - -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredPositiveInteger; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString; public class CustomElandInternalServiceSettings extends ElasticsearchInternalServiceSettings { public static final String NAME = "custom_eland_model_internal_service_settings"; + public CustomElandInternalServiceSettings(ElasticsearchInternalServiceSettings other) { + super(other); + } + public CustomElandInternalServiceSettings( Integer numAllocations, int numThreads, @@ -37,89 +29,12 @@ public CustomElandInternalServiceSettings( super(numAllocations, numThreads, modelId, adaptiveAllocationsSettings); } - /** - * Parse the CustomElandServiceSettings from map and validate the setting values. - * - * This method does not verify the model variant - * - * If required setting are missing or the values are invalid an - * {@link ValidationException} is thrown. - * - * @param map Source map containing the config - * @return The {@code CustomElandServiceSettings} builder - */ - public static CustomElandInternalServiceSettings fromMap(Map map) { - ValidationException validationException = new ValidationException(); - - Integer numAllocations = extractOptionalPositiveInteger( - map, - NUM_ALLOCATIONS, - ModelConfigurations.SERVICE_SETTINGS, - validationException - ); - Integer numThreads = extractRequiredPositiveInteger(map, NUM_THREADS, ModelConfigurations.SERVICE_SETTINGS, validationException); - AdaptiveAllocationsSettings adaptiveAllocationsSettings = ServiceUtils.removeAsAdaptiveAllocationsSettings( - map, - ADAPTIVE_ALLOCATIONS, - validationException - ); - String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); - - if (validationException.validationErrors().isEmpty() == false) { - throw validationException; - } - - var builder = new Builder() { - @Override - public CustomElandInternalServiceSettings build() { - return new CustomElandInternalServiceSettings( - getNumAllocations(), - getNumThreads(), - getModelId(), - getAdaptiveAllocationsSettings() - ); - } - }; - builder.setNumAllocations(numAllocations); - builder.setNumThreads(numThreads); - builder.setModelId(modelId); - builder.setAdaptiveAllocationsSettings(adaptiveAllocationsSettings); - return builder.build(); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - return super.toXContent(builder, params); - } - public CustomElandInternalServiceSettings(StreamInput in) throws IOException { - super( - in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS) ? in.readOptionalVInt() : in.readVInt(), - in.readVInt(), - in.readString(), - in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS) - ? in.readOptionalWriteable(AdaptiveAllocationsSettings::new) - : null - ); - } - - @Override - public boolean isFragment() { - return super.isFragment(); + super(in); } @Override public String getWriteableName() { return CustomElandInternalServiceSettings.NAME; } - - @Override - public TransportVersion getMinimalSupportedVersion() { - return TransportVersions.V_8_13_0; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalTextEmbeddingServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalTextEmbeddingServiceSettings.java index b792df540d74c..381c97969e79f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalTextEmbeddingServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalTextEmbeddingServiceSettings.java @@ -7,14 +7,12 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.ModelConfigurations; -import org.elasticsearch.inference.ServiceSettings; import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; @@ -33,7 +31,7 @@ import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractSimilarity; -public class CustomElandInternalTextEmbeddingServiceSettings implements ServiceSettings { +public class CustomElandInternalTextEmbeddingServiceSettings extends ElasticsearchInternalServiceSettings { public static final String NAME = "custom_eland_model_internal_text_embedding_service_settings"; @@ -51,12 +49,12 @@ public class CustomElandInternalTextEmbeddingServiceSettings implements ServiceS */ public static CustomElandInternalTextEmbeddingServiceSettings fromMap(Map map, ConfigurationParseContext context) { return switch (context) { - case REQUEST -> fromRequestMap(map); - case PERSISTENT -> fromPersistedMap(map); + case REQUEST -> forRequest(map); + case PERSISTENT -> forPersisted(map); }; } - private static CustomElandInternalTextEmbeddingServiceSettings fromRequestMap(Map map) { + private static CustomElandInternalTextEmbeddingServiceSettings forRequest(Map map) { ValidationException validationException = new ValidationException(); var commonFields = commonFieldsFromMap(map, validationException); @@ -67,7 +65,7 @@ private static CustomElandInternalTextEmbeddingServiceSettings fromRequestMap(Ma return new CustomElandInternalTextEmbeddingServiceSettings(commonFields); } - private static CustomElandInternalTextEmbeddingServiceSettings fromPersistedMap(Map map) { + private static CustomElandInternalTextEmbeddingServiceSettings forPersisted(Map map) { var commonFields = commonFieldsFromMap(map); Integer dims = extractOptionalPositiveInteger(map, DIMENSIONS, ModelConfigurations.SERVICE_SETTINGS, new ValidationException()); @@ -97,13 +95,12 @@ private static CommonFields commonFieldsFromMap(Map map, Validat ); return new CommonFields( - internalSettings, + internalSettings.build(), Objects.requireNonNullElse(similarity, SimilarityMeasure.COSINE), Objects.requireNonNullElse(elementType, DenseVectorFieldMapper.ElementType.FLOAT) ); } - private final ElasticsearchInternalServiceSettings internalServiceSettings; private final Integer dimensions; private final SimilarityMeasure similarityMeasure; private final DenseVectorFieldMapper.ElementType elementType; @@ -134,19 +131,14 @@ public CustomElandInternalTextEmbeddingServiceSettings( SimilarityMeasure similarityMeasure, DenseVectorFieldMapper.ElementType elementType ) { - internalServiceSettings = new ElasticsearchInternalServiceSettings( - numAllocations, - numThreads, - modelId, - adaptiveAllocationsSettings - ); + super(numAllocations, numThreads, modelId, adaptiveAllocationsSettings); this.dimensions = dimensions; this.similarityMeasure = Objects.requireNonNull(similarityMeasure); this.elementType = Objects.requireNonNull(elementType); } public CustomElandInternalTextEmbeddingServiceSettings(StreamInput in) throws IOException { - internalServiceSettings = new ElasticsearchInternalServiceSettings(in); + super(in); if (in.getTransportVersion().onOrAfter(TransportVersions.ML_INFERENCE_ELAND_SETTINGS_ADDED)) { dimensions = in.readOptionalVInt(); similarityMeasure = in.readEnum(SimilarityMeasure.class); @@ -163,7 +155,12 @@ private CustomElandInternalTextEmbeddingServiceSettings(CommonFields commonField } private CustomElandInternalTextEmbeddingServiceSettings(CommonFields commonFields, Integer dimensions) { - internalServiceSettings = commonFields.internalServiceSettings; + super( + commonFields.internalServiceSettings.getNumAllocations(), + commonFields.internalServiceSettings.getNumThreads(), + commonFields.internalServiceSettings.modelId(), + commonFields.internalServiceSettings.getAdaptiveAllocationsSettings() + ); this.dimensions = dimensions; similarityMeasure = commonFields.similarityMeasure; elementType = commonFields.elementType; @@ -173,7 +170,7 @@ private CustomElandInternalTextEmbeddingServiceSettings(CommonFields commonField public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - internalServiceSettings.addXContentFragment(builder, params); + addInternalSettingsToXContent(builder, params); if (dimensions != null) { builder.field(DIMENSIONS, dimensions); @@ -196,14 +193,9 @@ public String getWriteableName() { return CustomElandInternalTextEmbeddingServiceSettings.NAME; } - @Override - public TransportVersion getMinimalSupportedVersion() { - return TransportVersions.V_8_13_0; - } - @Override public void writeTo(StreamOutput out) throws IOException { - internalServiceSettings.writeTo(out); + super.writeTo(out); if (out.getTransportVersion().onOrAfter(TransportVersions.ML_INFERENCE_ELAND_SETTINGS_ADDED)) { out.writeOptionalVInt(dimensions); @@ -212,10 +204,6 @@ public void writeTo(StreamOutput out) throws IOException { } } - public ElasticsearchInternalServiceSettings getElasticsearchInternalServiceSettings() { - return internalServiceSettings; - } - @Override public DenseVectorFieldMapper.ElementType elementType() { return elementType; @@ -231,11 +219,6 @@ public Integer dimensions() { return dimensions; } - @Override - public String modelId() { - return getElasticsearchInternalServiceSettings().modelId(); - } - @Override public ToXContentObject getFilteredXContentObject() { return this; @@ -246,7 +229,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; CustomElandInternalTextEmbeddingServiceSettings that = (CustomElandInternalTextEmbeddingServiceSettings) o; - return Objects.equals(internalServiceSettings, that.internalServiceSettings) + return super.equals(that) && Objects.equals(dimensions, that.dimensions) && Objects.equals(similarityMeasure, that.similarityMeasure) && Objects.equals(elementType, that.elementType); @@ -254,7 +237,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hash(internalServiceSettings, dimensions, similarityMeasure, elementType); + return Objects.hash(super.hashCode(), dimensions, similarityMeasure, elementType); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandModel.java index e3ec9b2e19312..83f22f08b620d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandModel.java @@ -10,37 +10,30 @@ import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.inference.Model; -import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.TaskSettings; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; -import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; -import org.elasticsearch.xpack.inference.services.settings.InternalServiceSettings; -import java.util.Objects; +public class CustomElandModel extends ElasticsearchInternalModel { -import static org.elasticsearch.xpack.core.ml.inference.assignment.AllocationStatus.State.STARTED; - -public class CustomElandModel extends Model implements ElasticsearchModel { - private final InternalServiceSettings internalServiceSettings; - - public CustomElandModel(ModelConfigurations configurations, InternalServiceSettings internalServiceSettings) { - super(configurations); - this.internalServiceSettings = Objects.requireNonNull(internalServiceSettings); - } - - public String getModelId() { - return internalServiceSettings.modelId(); + public CustomElandModel( + String inferenceEntityId, + TaskType taskType, + String service, + ElasticsearchInternalServiceSettings internalServiceSettings + ) { + super(inferenceEntityId, taskType, service, internalServiceSettings); } - @Override - public StartTrainedModelDeploymentAction.Request getStartTrainedModelDeploymentActionRequest() { - var startRequest = new StartTrainedModelDeploymentAction.Request(internalServiceSettings.modelId(), this.getInferenceEntityId()); - startRequest.setNumberOfAllocations(internalServiceSettings.getNumAllocations()); - startRequest.setThreadsPerAllocation(internalServiceSettings.getNumThreads()); - startRequest.setAdaptiveAllocationsSettings(internalServiceSettings.getAdaptiveAllocationsSettings()); - startRequest.setWaitForState(STARTED); - - return startRequest; + public CustomElandModel( + String inferenceEntityId, + TaskType taskType, + String service, + ElasticsearchInternalServiceSettings internalServiceSettings, + TaskSettings taskSettings + ) { + super(inferenceEntityId, taskType, service, internalServiceSettings, taskSettings); } @Override @@ -60,10 +53,9 @@ public void onFailure(Exception e) { if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { listener.onFailure( new ResourceNotFoundException( - "Could not start the TextEmbeddingService service as the " - + "custom eland model [{0}] for this platform cannot be found." + "Could not start the inference as the custom eland model [{0}] for this platform cannot be found." + " Custom models need to be loaded into the cluster with eland before they can be started.", - getModelId() + internalServiceSettings.modelId() ) ); return; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandRerankModel.java index d880450739319..63f4a3dbf8472 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandRerankModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandRerankModel.java @@ -7,40 +7,18 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.TaskType; -import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; - -import java.util.Map; public class CustomElandRerankModel extends CustomElandModel { public CustomElandRerankModel( - String inferenceEntityId, - TaskType taskType, - String service, - Map serviceSettings, - Map taskSettings, - ConfigurationParseContext context - ) { - this( - inferenceEntityId, - taskType, - service, - CustomElandInternalServiceSettings.fromMap(serviceSettings), - CustomElandRerankTaskSettings.defaultsFromMap(taskSettings) - ); - } - - // default for testing - CustomElandRerankModel( String inferenceEntityId, TaskType taskType, String service, CustomElandInternalServiceSettings serviceSettings, CustomElandRerankTaskSettings taskSettings ) { - super(new ModelConfigurations(inferenceEntityId, taskType, service, serviceSettings, taskSettings), serviceSettings); + super(inferenceEntityId, taskType, service, serviceSettings, taskSettings); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalModel.java new file mode 100644 index 0000000000000..405c687839629 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalModel.java @@ -0,0 +1,59 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.elasticsearch; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.TaskSettings; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; +import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; + +import static org.elasticsearch.xpack.core.ml.inference.assignment.AllocationStatus.State.STARTED; + +public abstract class ElasticsearchInternalModel extends Model { + + protected final ElasticsearchInternalServiceSettings internalServiceSettings; + + public ElasticsearchInternalModel( + String inferenceEntityId, + TaskType taskType, + String service, + ElasticsearchInternalServiceSettings internalServiceSettings + ) { + super(new ModelConfigurations(inferenceEntityId, taskType, service, internalServiceSettings)); + this.internalServiceSettings = internalServiceSettings; + } + + public ElasticsearchInternalModel( + String inferenceEntityId, + TaskType taskType, + String service, + ElasticsearchInternalServiceSettings internalServiceSettings, + TaskSettings taskSettings + ) { + super(new ModelConfigurations(inferenceEntityId, taskType, service, internalServiceSettings, taskSettings)); + this.internalServiceSettings = internalServiceSettings; + } + + public StartTrainedModelDeploymentAction.Request getStartTrainedModelDeploymentActionRequest() { + var startRequest = new StartTrainedModelDeploymentAction.Request(internalServiceSettings.modelId(), this.getInferenceEntityId()); + startRequest.setNumberOfAllocations(internalServiceSettings.getNumAllocations()); + startRequest.setThreadsPerAllocation(internalServiceSettings.getNumThreads()); + startRequest.setAdaptiveAllocationsSettings(internalServiceSettings.getAdaptiveAllocationsSettings()); + startRequest.setWaitForState(STARTED); + + return startRequest; + } + + public abstract ActionListener getCreateTrainedModelAssignmentActionListener( + Model model, + ActionListener listener + ); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java index c48bf025933ac..ef72b022e929e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java @@ -13,13 +13,12 @@ import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.client.internal.OriginSettingClient; +import org.elasticsearch.common.ValidationException; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.InferenceResults; -import org.elasticsearch.inference.InferenceService; import org.elasticsearch.inference.InferenceServiceExtension; import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.inference.InputType; @@ -27,7 +26,6 @@ import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.xpack.core.ClientHelper; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; @@ -39,20 +37,17 @@ import org.elasticsearch.xpack.core.ml.action.StopTrainedModelDeploymentAction; import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput; -import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResults; -import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextSimilarityConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.ServiceUtils; -import org.elasticsearch.xpack.inference.services.settings.InternalServiceSettings; -import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -64,9 +59,8 @@ import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; -import static org.elasticsearch.xpack.inference.services.settings.InternalServiceSettings.MODEL_ID; -public class ElasticsearchInternalService implements InferenceService { +public class ElasticsearchInternalService extends BaseElasticsearchInternalService { public static final String NAME = "elasticsearch"; @@ -77,12 +71,15 @@ public class ElasticsearchInternalService implements InferenceService { MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86 ); - private final OriginSettingClient client; - private static final Logger logger = LogManager.getLogger(ElasticsearchInternalService.class); public ElasticsearchInternalService(InferenceServiceExtension.InferenceServiceFactoryContext context) { - this.client = new OriginSettingClient(context.client(), ClientHelper.INFERENCE_ORIGIN); + super(context); + } + + @Override + protected EnumSet supportedTaskTypes() { + return EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING); } @Override @@ -96,14 +93,16 @@ public void parseRequestConfig( try { Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); Map taskSettingsMap = removeFromMap(config, ModelConfigurations.TASK_SETTINGS); - String modelId = (String) serviceSettingsMap.get(MODEL_ID); + + throwIfNotEmptyMap(config, name()); + + String modelId = (String) serviceSettingsMap.get(ElasticsearchInternalServiceSettings.MODEL_ID); if (modelId == null) { - throw new IllegalArgumentException("Error parsing request config, model id is missing"); + throw new ValidationException().addValidationError("Error parsing request config, model id is missing"); } if (MULTILINGUAL_E5_SMALL_VALID_IDS.contains(modelId)) { e5Case(inferenceEntityId, taskType, config, platformArchitectures, serviceSettingsMap, modelListener); } else { - throwIfNotEmptyMap(config, name()); customElandCase(inferenceEntityId, taskType, serviceSettingsMap, taskSettingsMap, modelListener); } } catch (Exception e) { @@ -118,7 +117,7 @@ private void customElandCase( Map taskSettingsMap, ActionListener modelListener ) { - String modelId = (String) serviceSettingsMap.get(MODEL_ID); + String modelId = (String) serviceSettingsMap.get(ElasticsearchInternalServiceSettings.MODEL_ID); var request = new GetTrainedModelsAction.Request(modelId); var getModelsListener = modelListener.delegateFailureAndWrap((delegate, response) -> { @@ -154,13 +153,37 @@ private static CustomElandModel createCustomElandModel( Map taskSettings, ConfigurationParseContext context ) { + return switch (taskType) { - case TEXT_EMBEDDING -> new CustomElandEmbeddingModel(inferenceEntityId, taskType, NAME, serviceSettings, context); - case RERANK -> new CustomElandRerankModel(inferenceEntityId, taskType, NAME, serviceSettings, taskSettings, context); + case TEXT_EMBEDDING -> new CustomElandEmbeddingModel( + inferenceEntityId, + taskType, + NAME, + CustomElandInternalTextEmbeddingServiceSettings.fromMap(serviceSettings, context) + ); + case RERANK -> new CustomElandRerankModel( + inferenceEntityId, + taskType, + NAME, + elandServiceSettings(serviceSettings, context), + CustomElandRerankTaskSettings.fromMap(taskSettings) + ); default -> throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST); }; } + private static CustomElandInternalServiceSettings elandServiceSettings( + Map settingsMap, + ConfigurationParseContext context + ) { + return switch (context) { + case REQUEST -> new CustomElandInternalServiceSettings( + ElasticsearchInternalServiceSettings.fromRequestMap(settingsMap).build() + ); + case PERSISTENT -> new CustomElandInternalServiceSettings(ElasticsearchInternalServiceSettings.fromPersistedMap(settingsMap)); + }; + } + private void e5Case( String inferenceEntityId, TaskType taskType, @@ -169,16 +192,22 @@ private void e5Case( Map serviceSettingsMap, ActionListener modelListener ) { - var e5ServiceSettings = MultilingualE5SmallInternalServiceSettings.fromMap(serviceSettingsMap); - - if (e5ServiceSettings.getModelId() == null) { - e5ServiceSettings.setModelId(selectDefaultModelVariantBasedOnClusterArchitecture(platformArchitectures)); + var esServiceSettingsBuilder = ElasticsearchInternalServiceSettings.fromRequestMap(serviceSettingsMap); + + if (esServiceSettingsBuilder.getModelId() == null) { + esServiceSettingsBuilder.setModelId( + selectDefaultModelVariantBasedOnClusterArchitecture( + platformArchitectures, + MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86, + MULTILINGUAL_E5_SMALL_MODEL_ID + ) + ); } - if (modelVariantDoesNotMatchArchitecturesAndIsNotPlatformAgnostic(platformArchitectures, e5ServiceSettings)) { + if (modelVariantDoesNotMatchArchitecturesAndIsNotPlatformAgnostic(platformArchitectures, esServiceSettingsBuilder.getModelId())) { throw new IllegalArgumentException( "Error parsing request config, model id does not match any models available on this platform. Was [" - + e5ServiceSettings.getModelId() + + esServiceSettingsBuilder.getModelId() + "]" ); } @@ -191,17 +220,22 @@ private void e5Case( inferenceEntityId, taskType, NAME, - (MultilingualE5SmallInternalServiceSettings) e5ServiceSettings.build() + new MultilingualE5SmallInternalServiceSettings(esServiceSettingsBuilder.build()) ) ); } private static boolean modelVariantDoesNotMatchArchitecturesAndIsNotPlatformAgnostic( Set platformArchitectures, - InternalServiceSettings.Builder e5ServiceSettings + String modelId ) { - return e5ServiceSettings.getModelId().equals(selectDefaultModelVariantBasedOnClusterArchitecture(platformArchitectures)) == false - && e5ServiceSettings.getModelId().equals(MULTILINGUAL_E5_SMALL_MODEL_ID) == false; + return modelId.equals( + selectDefaultModelVariantBasedOnClusterArchitecture( + platformArchitectures, + MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86, + MULTILINGUAL_E5_SMALL_MODEL_ID + ) + ) && modelId.equals(MULTILINGUAL_E5_SMALL_MODEL_ID) == false; } @Override @@ -219,7 +253,7 @@ public Model parsePersistedConfig(String inferenceEntityId, TaskType taskType, M Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); Map taskSettingsMap = removeFromMap(config, ModelConfigurations.TASK_SETTINGS); - String modelId = (String) serviceSettingsMap.get(MODEL_ID); + String modelId = (String) serviceSettingsMap.get(ElasticsearchInternalServiceSettings.MODEL_ID); if (modelId == null) { throw new IllegalArgumentException("Error parsing request config, model id is missing"); } @@ -229,7 +263,7 @@ public Model parsePersistedConfig(String inferenceEntityId, TaskType taskType, M inferenceEntityId, taskType, NAME, - (MultilingualE5SmallInternalServiceSettings) MultilingualE5SmallInternalServiceSettings.fromMap(serviceSettingsMap).build() + new MultilingualE5SmallInternalServiceSettings(ElasticsearchInternalServiceSettings.fromPersistedMap(serviceSettingsMap)) ); } else { return createCustomElandModel( @@ -250,7 +284,7 @@ public void checkModelConfig(Model model, ActionListener listener) { // model id. To get around this we'll have the getEmbeddingSize() method use the model id instead of inference id. So we need // to create a temporary model that overrides the inference id with the model id. var temporaryModelWithModelId = new CustomElandEmbeddingModel( - elandModel.getModelId(), + elandModel.getServiceSettings().modelId(), elandModel.getTaskType(), elandModel.getConfigurations().getService(), elandModel.getServiceSettings() @@ -268,10 +302,10 @@ public void checkModelConfig(Model model, ActionListener listener) { private static CustomElandEmbeddingModel updateModelWithEmbeddingDetails(CustomElandEmbeddingModel model, int embeddingSize) { CustomElandInternalTextEmbeddingServiceSettings serviceSettings = new CustomElandInternalTextEmbeddingServiceSettings( - model.getServiceSettings().getElasticsearchInternalServiceSettings().getNumAllocations(), - model.getServiceSettings().getElasticsearchInternalServiceSettings().getNumThreads(), - model.getServiceSettings().getElasticsearchInternalServiceSettings().modelId(), - model.getServiceSettings().getElasticsearchInternalServiceSettings().getAdaptiveAllocationsSettings(), + model.getServiceSettings().getNumAllocations(), + model.getServiceSettings().getNumThreads(), + model.getServiceSettings().modelId(), + model.getServiceSettings().getAdaptiveAllocationsSettings(), embeddingSize, model.getServiceSettings().similarity(), model.getServiceSettings().elementType() @@ -439,8 +473,8 @@ private static ChunkedInferenceServiceResults translateToChunkedResult(Inference @Override public void start(Model model, ActionListener listener) { - if (model instanceof ElasticsearchModel == false) { - listener.onFailure(notTextEmbeddingModelException(model)); + if (model instanceof ElasticsearchInternalModel == false) { + listener.onFailure(notElasticsearchModelException(model)); return; } @@ -451,8 +485,8 @@ public void start(Model model, ActionListener listener) { return; } - var startRequest = ((ElasticsearchModel) model).getStartTrainedModelDeploymentActionRequest(); - var responseListener = ((ElasticsearchModel) model).getCreateTrainedModelAssignmentActionListener(model, listener); + var startRequest = ((ElasticsearchInternalModel) model).getStartTrainedModelDeploymentActionRequest(); + var responseListener = ((ElasticsearchInternalModel) model).getCreateTrainedModelAssignmentActionListener(model, listener); client.execute(StartTrainedModelDeploymentAction.INSTANCE, startRequest, responseListener); } @@ -470,8 +504,8 @@ public void stop(String inferenceEntityId, ActionListener listener) { @Override public void putModel(Model model, ActionListener listener) { - if (model instanceof ElasticsearchModel == false) { - listener.onFailure(notTextEmbeddingModelException(model)); + if (model instanceof ElasticsearchInternalModel == false) { + listener.onFailure(notElasticsearchModelException(model)); return; } else if (model instanceof MultilingualE5SmallModel e5Model) { String modelId = e5Model.getServiceSettings().modelId(); @@ -517,12 +551,12 @@ public void isModelDownloaded(Model model, ActionListener listener) { } }); - if (model instanceof ElasticsearchModel == false) { - listener.onFailure(notTextEmbeddingModelException(model)); - } else if (model.getServiceSettings() instanceof InternalServiceSettings internalServiceSettings) { + if (model.getServiceSettings() instanceof ElasticsearchInternalServiceSettings internalServiceSettings) { String modelId = internalServiceSettings.modelId(); GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId); executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); + } else if (model instanceof ElasticsearchInternalModel == false) { + listener.onFailure(notElasticsearchModelException(model)); } else { listener.onFailure( new IllegalArgumentException( @@ -534,42 +568,16 @@ public void isModelDownloaded(Model model, ActionListener listener) { } } - private static IllegalStateException notTextEmbeddingModelException(Model model) { - return new IllegalStateException( - "Error starting model, [" + model.getConfigurations().getInferenceEntityId() + "] is not a text embedding model" - ); - } - - @Override - public boolean isInClusterService() { - return true; - } - @Override public TransportVersion getMinimalSupportedVersion() { return TransportVersions.ML_INFERENCE_L2_NORM_SIMILARITY_ADDED; } - @Override - public void close() throws IOException {} - @Override public String name() { return NAME; } - private static String selectDefaultModelVariantBasedOnClusterArchitecture(Set modelArchitectures) { - // choose a default model version based on the cluster architecture - boolean homogenous = modelArchitectures.size() == 1; - if (homogenous && modelArchitectures.iterator().next().equals("linux-x86_64")) { - // Use the hardware optimized model - return MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86; - } else { - // default to the platform-agnostic model - return MULTILINGUAL_E5_SMALL_MODEL_ID; - } - } - private RankedDocsResults textSimilarityResultsToRankedDocs( List results, Function inputSupplier @@ -601,21 +609,4 @@ private RankedDocsResults textSimilarityResultsToRankedDocs( Collections.sort(rankings); return new RankedDocsResults(rankings); } - - public static InferModelAction.Request buildInferenceRequest( - String id, - InferenceConfigUpdate update, - List inputs, - InputType inputType, - TimeValue timeout, - boolean chunk - ) { - var request = InferModelAction.Request.forTextInput(id, update, inputs, true, timeout); - request.setPrefixType( - InputType.SEARCH == inputType ? TrainedModelPrefixStrings.PrefixType.SEARCH : TrainedModelPrefixStrings.PrefixType.INGEST - ); - request.setHighPriority(InputType.SEARCH == inputType); - request.setChunked(chunk); - return request; - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettings.java index f1255519590cb..1acf19c5373b7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettings.java @@ -11,25 +11,65 @@ import org.elasticsearch.TransportVersions; import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; import org.elasticsearch.xpack.inference.services.ServiceUtils; -import org.elasticsearch.xpack.inference.services.settings.InternalServiceSettings; import java.io.IOException; +import java.util.List; import java.util.Map; import java.util.Objects; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalString; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredPositiveInteger; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString; -public class ElasticsearchInternalServiceSettings extends InternalServiceSettings { +public class ElasticsearchInternalServiceSettings implements ServiceSettings { public static final String NAME = "text_embedding_internal_service_settings"; private static final int FAILED_INT_PARSE_VALUE = -1; - public static ElasticsearchInternalServiceSettings fromMap(Map map, ValidationException validationException) { + public static final String NUM_ALLOCATIONS = "num_allocations"; + public static final String NUM_THREADS = "num_threads"; + public static final String MODEL_ID = "model_id"; + public static final String ADAPTIVE_ALLOCATIONS = "adaptive_allocations"; + + private final Integer numAllocations; + private final int numThreads; + private final String modelId; + private final AdaptiveAllocationsSettings adaptiveAllocationsSettings; + + public static ElasticsearchInternalServiceSettings fromPersistedMap(Map map) { + return fromRequestMap(map).build(); + } + + /** + * Parse the ElasticsearchInternalServiceSettings from the map. + * Validates that present threading settings are of the right type and value, + * The model id is optional, it is for the inference service to check and + * potentially set a default value for the model id. + * Throws an {@code ValidationException} on validation failures + * + * @param map The request map. + * @return A builder to allow the settings to be modified. + */ + public static ElasticsearchInternalServiceSettings.Builder fromRequestMap(Map map) { + var validationException = new ValidationException(); + var builder = fromMap(map, validationException); + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + return builder; + } + + protected static ElasticsearchInternalServiceSettings.Builder fromMap( + Map map, + ValidationException validationException + ) { Integer numAllocations = extractOptionalPositiveInteger( map, NUM_ALLOCATIONS, @@ -42,36 +82,112 @@ public static ElasticsearchInternalServiceSettings fromMap(Map m ADAPTIVE_ALLOCATIONS, validationException ); - String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); + + // model id is optional as the ELSER and E5 service will default it + String modelId = extractOptionalString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); + + if (numAllocations == null && adaptiveAllocationsSettings == null) { + validationException.addValidationError( + ServiceUtils.missingOneOfSettingsErrorMsg( + List.of(NUM_ALLOCATIONS, ADAPTIVE_ALLOCATIONS), + ModelConfigurations.SERVICE_SETTINGS + ) + ); + } // if an error occurred while parsing, we'll set these to an invalid value, so we don't accidentally get a // null pointer when doing unboxing - return new ElasticsearchInternalServiceSettings( - numAllocations, - Objects.requireNonNullElse(numThreads, FAILED_INT_PARSE_VALUE), - modelId, - adaptiveAllocationsSettings - ); + return new ElasticsearchInternalServiceSettings.Builder().setNumAllocations(numAllocations) + .setNumThreads(Objects.requireNonNullElse(numThreads, FAILED_INT_PARSE_VALUE)) + .setModelId(modelId) + .setAdaptiveAllocationsSettings(adaptiveAllocationsSettings); } public ElasticsearchInternalServiceSettings( Integer numAllocations, int numThreads, - String modelVariant, + String modelId, AdaptiveAllocationsSettings adaptiveAllocationsSettings ) { - super(numAllocations, numThreads, modelVariant, adaptiveAllocationsSettings); + this.numAllocations = numAllocations; + this.numThreads = numThreads; + this.modelId = Objects.requireNonNull(modelId); + this.adaptiveAllocationsSettings = adaptiveAllocationsSettings; + } + + protected ElasticsearchInternalServiceSettings(ElasticsearchInternalServiceSettings other) { + this.numAllocations = other.numAllocations; + this.numThreads = other.numThreads; + this.modelId = other.modelId; + this.adaptiveAllocationsSettings = other.adaptiveAllocationsSettings; } public ElasticsearchInternalServiceSettings(StreamInput in) throws IOException { - super( - in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS) ? in.readOptionalVInt() : in.readVInt(), - in.readVInt(), - in.readString(), - in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS) - ? in.readOptionalWriteable(AdaptiveAllocationsSettings::new) - : null - ); + if (in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) { + this.numAllocations = in.readOptionalVInt(); + } else { + this.numAllocations = in.readVInt(); + } + this.numThreads = in.readVInt(); + this.modelId = in.readString(); + this.adaptiveAllocationsSettings = in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS) + ? in.readOptionalWriteable(AdaptiveAllocationsSettings::new) + : null; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) { + out.writeOptionalVInt(getNumAllocations()); + } else { + out.writeVInt(getNumAllocations()); + } + out.writeVInt(getNumThreads()); + out.writeString(modelId()); + if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) { + out.writeOptionalWriteable(getAdaptiveAllocationsSettings()); + } + } + + @Override + public String modelId() { + return modelId; + } + + public Integer getNumAllocations() { + return numAllocations; + } + + public int getNumThreads() { + return numThreads; + } + + public AdaptiveAllocationsSettings getAdaptiveAllocationsSettings() { + return adaptiveAllocationsSettings; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + addInternalSettingsToXContent(builder, params); + builder.endObject(); + return builder; + } + + protected void addInternalSettingsToXContent(XContentBuilder builder, Params params) throws IOException { + if (numAllocations != null) { + builder.field(NUM_ALLOCATIONS, numAllocations); + } + builder.field(NUM_THREADS, getNumThreads()); + builder.field(MODEL_ID, modelId()); + if (adaptiveAllocationsSettings != null) { + builder.field(ADAPTIVE_ALLOCATIONS, adaptiveAllocationsSettings); + } + } + + @Override + public ToXContentObject getFilteredXContentObject() { + return this; } @Override @@ -84,4 +200,65 @@ public TransportVersion getMinimalSupportedVersion() { return TransportVersions.V_8_13_0; } + public static class Builder { + private Integer numAllocations; + private int numThreads; + private String modelId; + private AdaptiveAllocationsSettings adaptiveAllocationsSettings; + + public ElasticsearchInternalServiceSettings build() { + return new ElasticsearchInternalServiceSettings(numAllocations, numThreads, modelId, adaptiveAllocationsSettings); + } + + public Builder setNumAllocations(Integer numAllocations) { + this.numAllocations = numAllocations; + return this; + } + + public Builder setNumThreads(int numThreads) { + this.numThreads = numThreads; + return this; + } + + public Builder setModelId(String modelId) { + this.modelId = modelId; + return this; + } + + public Builder setAdaptiveAllocationsSettings(AdaptiveAllocationsSettings adaptiveAllocationsSettings) { + this.adaptiveAllocationsSettings = adaptiveAllocationsSettings; + return this; + } + + public String getModelId() { + return modelId; + } + + public Integer getNumAllocations() { + return numAllocations; + } + + public int getNumThreads() { + return numThreads; + } + + public AdaptiveAllocationsSettings getAdaptiveAllocationsSettings() { + return adaptiveAllocationsSettings; + } + } + + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ElasticsearchInternalServiceSettings that = (ElasticsearchInternalServiceSettings) o; + return Objects.equals(numAllocations, that.numAllocations) + && numThreads == that.numThreads + && Objects.equals(modelId, that.modelId) + && Objects.equals(adaptiveAllocationsSettings, that.adaptiveAllocationsSettings); + } + + @Override + public int hashCode() { + return Objects.hash(numAllocations, numThreads, modelId, adaptiveAllocationsSettings); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchModel.java deleted file mode 100644 index 627e570b24163..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchModel.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.elasticsearch; - -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.inference.Model; -import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; -import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; - -public interface ElasticsearchModel { - String getModelId(); - - StartTrainedModelDeploymentAction.Request getStartTrainedModelDeploymentActionRequest(); - - ActionListener getCreateTrainedModelAssignmentActionListener( - Model model, - ActionListener listener - ); -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettings.java index 169082b3d4a9d..2f27fa073b4f0 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettings.java @@ -7,26 +7,16 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.elasticsearch.TransportVersions; import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.core.Nullable; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; -import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; -import org.elasticsearch.xpack.inference.services.ServiceUtils; -import org.elasticsearch.xpack.inference.services.settings.InternalServiceSettings; import java.io.IOException; import java.util.Arrays; -import java.util.List; import java.util.Map; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredPositiveInteger; - public class MultilingualE5SmallInternalServiceSettings extends ElasticsearchInternalServiceSettings { public static final String NAME = "multilingual_e5_small_service_settings"; @@ -34,6 +24,10 @@ public class MultilingualE5SmallInternalServiceSettings extends ElasticsearchInt static final int DIMENSIONS = 384; static final SimilarityMeasure SIMILARITY = SimilarityMeasure.COSINE; + public MultilingualE5SmallInternalServiceSettings(ElasticsearchInternalServiceSettings other) { + super(other); + } + public MultilingualE5SmallInternalServiceSettings( Integer numAllocations, int numThreads, @@ -44,14 +38,7 @@ public MultilingualE5SmallInternalServiceSettings( } public MultilingualE5SmallInternalServiceSettings(StreamInput in) throws IOException { - super( - in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS) ? in.readOptionalVInt() : in.readVInt(), - in.readVInt(), - in.readString(), - in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS) - ? in.readOptionalWriteable(AdaptiveAllocationsSettings::new) - : null - ); + super(in); } /** @@ -61,41 +48,13 @@ public MultilingualE5SmallInternalServiceSettings(StreamInput in) throws IOExcep * {@link ValidationException} is thrown. * * @param map Source map containing the config - * @return The {@code MultilingualE5SmallServiceSettings} builder + * @return The builder */ - public static MultilingualE5SmallInternalServiceSettings.Builder fromMap(Map map) { + public static ElasticsearchInternalServiceSettings.Builder fromRequestMap(Map map) { ValidationException validationException = new ValidationException(); - var requestFields = extractRequestFields(map, validationException); - - if (validationException.validationErrors().isEmpty() == false) { - throw validationException; - } - - return createBuilder(requestFields); - } + var baseSettings = ElasticsearchInternalServiceSettings.fromMap(map, validationException); - private static RequestFields extractRequestFields(Map map, ValidationException validationException) { - Integer numAllocations = extractOptionalPositiveInteger( - map, - NUM_ALLOCATIONS, - ModelConfigurations.SERVICE_SETTINGS, - validationException - ); - Integer numThreads = extractRequiredPositiveInteger(map, NUM_THREADS, ModelConfigurations.SERVICE_SETTINGS, validationException); - AdaptiveAllocationsSettings adaptiveAllocationsSettings = ServiceUtils.removeAsAdaptiveAllocationsSettings( - map, - ADAPTIVE_ALLOCATIONS, - validationException - ); - if (numAllocations == null && adaptiveAllocationsSettings == null) { - validationException.addValidationError( - ServiceUtils.missingOneOfSettingsErrorMsg( - List.of(NUM_ALLOCATIONS, ADAPTIVE_ALLOCATIONS), - ModelConfigurations.SERVICE_SETTINGS - ) - ); - } - String modelId = ServiceUtils.removeAsType(map, MODEL_ID, String.class); + String modelId = baseSettings.getModelId(); if (modelId != null) { if (ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_VALID_IDS.contains(modelId) == false) { validationException.addValidationError( @@ -107,38 +66,11 @@ private static RequestFields extractRequestFields(Map map, Valid } } - return new RequestFields(numAllocations, numThreads, modelId, adaptiveAllocationsSettings); - } - - private static MultilingualE5SmallInternalServiceSettings.Builder createBuilder(RequestFields requestFields) { - var builder = new InternalServiceSettings.Builder() { - @Override - public MultilingualE5SmallInternalServiceSettings build() { - return new MultilingualE5SmallInternalServiceSettings( - getNumAllocations(), - getNumThreads(), - getModelId(), - getAdaptiveAllocationsSettings() - ); - } - }; - builder.setNumAllocations(requestFields.numAllocations); - builder.setNumThreads(requestFields.numThreads); - builder.setModelId(requestFields.modelId); - builder.setAdaptiveAllocationsSettings(requestFields.adaptiveAllocationsSettings); - return builder; - } - - private record RequestFields( - @Nullable Integer numAllocations, - @Nullable Integer numThreads, - @Nullable String modelId, - @Nullable AdaptiveAllocationsSettings adaptiveAllocationsSettings - ) {} + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } - @Override - public boolean isFragment() { - return super.isFragment(); + return baseSettings; } @Override @@ -146,11 +78,6 @@ public String getWriteableName() { return MultilingualE5SmallInternalServiceSettings.NAME; } - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - } - @Override public SimilarityMeasure similarity() { return SIMILARITY; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallModel.java index b778c4ca1e512..59e5e9c1550c5 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallModel.java @@ -10,15 +10,11 @@ import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.inference.Model; -import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; -import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; -import static org.elasticsearch.xpack.core.ml.inference.assignment.AllocationStatus.State.STARTED; - -public class MultilingualE5SmallModel extends Model implements ElasticsearchModel { +public class MultilingualE5SmallModel extends ElasticsearchInternalModel { public MultilingualE5SmallModel( String inferenceEntityId, @@ -26,7 +22,7 @@ public MultilingualE5SmallModel( String service, MultilingualE5SmallInternalServiceSettings serviceSettings ) { - super(new ModelConfigurations(inferenceEntityId, taskType, service, serviceSettings)); + super(inferenceEntityId, taskType, service, serviceSettings); } @Override @@ -34,22 +30,6 @@ public MultilingualE5SmallInternalServiceSettings getServiceSettings() { return (MultilingualE5SmallInternalServiceSettings) super.getServiceSettings(); } - @Override - public String getModelId() { - return getServiceSettings().modelId(); - } - - @Override - public StartTrainedModelDeploymentAction.Request getStartTrainedModelDeploymentActionRequest() { - var startRequest = new StartTrainedModelDeploymentAction.Request(this.getServiceSettings().modelId(), this.getInferenceEntityId()); - startRequest.setNumberOfAllocations(this.getServiceSettings().getNumAllocations()); - startRequest.setThreadsPerAllocation(this.getServiceSettings().getNumThreads()); - startRequest.setAdaptiveAllocationsSettings(this.getServiceSettings().getAdaptiveAllocationsSettings()); - startRequest.setWaitForState(STARTED); - - return startRequest; - } - @Override public ActionListener getCreateTrainedModelAssignmentActionListener( Model model, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalModel.java index 82c0052e16970..bb668c314649d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalModel.java @@ -7,11 +7,15 @@ package org.elasticsearch.xpack.inference.services.elser; +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.inference.Model; -import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; +import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalModel; -public class ElserInternalModel extends Model { +public class ElserInternalModel extends ElasticsearchInternalModel { public ElserInternalModel( String inferenceEntityId, @@ -20,7 +24,7 @@ public ElserInternalModel( ElserInternalServiceSettings serviceSettings, ElserMlNodeTaskSettings taskSettings ) { - super(new ModelConfigurations(inferenceEntityId, taskType, service, serviceSettings, taskSettings)); + super(inferenceEntityId, taskType, service, serviceSettings, taskSettings); } @Override @@ -32,4 +36,31 @@ public ElserInternalServiceSettings getServiceSettings() { public ElserMlNodeTaskSettings getTaskSettings() { return (ElserMlNodeTaskSettings) super.getTaskSettings(); } + + @Override + public ActionListener getCreateTrainedModelAssignmentActionListener( + Model model, + ActionListener listener + ) { + return new ActionListener<>() { + @Override + public void onResponse(CreateTrainedModelAssignmentAction.Response response) { + listener.onResponse(Boolean.TRUE); + } + + @Override + public void onFailure(Exception e) { + if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { + listener.onFailure( + new ResourceNotFoundException( + "Could not start the ELSER service as the ELSER model for this platform cannot be found." + + " ELSER needs to be downloaded before it can be started." + ) + ); + return; + } + listener.onFailure(e); + } + }; + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java index f5cdbbde04347..03d7682600e7c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java @@ -10,17 +10,14 @@ package org.elasticsearch.xpack.inference.services.elser; import org.elasticsearch.ElasticsearchStatusException; -import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.client.internal.OriginSettingClient; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.InferenceResults; -import org.elasticsearch.inference.InferenceService; import org.elasticsearch.inference.InferenceServiceExtension; import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.inference.InputType; @@ -28,39 +25,30 @@ import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.xpack.core.ClientHelper; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; -import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.InferModelAction; -import org.elasticsearch.xpack.core.ml.action.PutTrainedModelAction; -import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; -import org.elasticsearch.xpack.core.ml.action.StopTrainedModelDeploymentAction; -import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; -import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; -import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.inference.services.ServiceUtils; +import org.elasticsearch.xpack.inference.services.elasticsearch.BaseElasticsearchInternalService; -import java.io.IOException; import java.util.ArrayList; +import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.Set; import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN; import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin; -import static org.elasticsearch.xpack.core.ml.inference.assignment.AllocationStatus.State.STARTED; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; -import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.buildInferenceRequest; -public class ElserInternalService implements InferenceService { +public class ElserInternalService extends BaseElasticsearchInternalService { public static final String NAME = "elser"; @@ -77,14 +65,13 @@ public class ElserInternalService implements InferenceService { private static final String OLD_MODEL_ID_FIELD_NAME = "model_version"; - private final OriginSettingClient client; - public ElserInternalService(InferenceServiceExtension.InferenceServiceFactoryContext context) { - this.client = new OriginSettingClient(context.client(), ClientHelper.INFERENCE_ORIGIN); + super(context); } - public boolean isInClusterService() { - return true; + @Override + protected EnumSet supportedTaskTypes() { + return EnumSet.of(TaskType.SPARSE_EMBEDDING); } @Override @@ -97,10 +84,12 @@ public void parseRequestConfig( ) { try { Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); - var serviceSettingsBuilder = ElserInternalServiceSettings.fromMap(serviceSettingsMap); + var serviceSettingsBuilder = ElserInternalServiceSettings.fromRequestMap(serviceSettingsMap); if (serviceSettingsBuilder.getModelId() == null) { - serviceSettingsBuilder.setModelId(selectDefaultModelVersionBasedOnClusterArchitecture(modelArchitectures)); + serviceSettingsBuilder.setModelId( + selectDefaultModelVariantBasedOnClusterArchitecture(modelArchitectures, ELSER_V2_MODEL_LINUX_X86, ELSER_V2_MODEL) + ); } Map taskSettingsMap; @@ -122,7 +111,7 @@ public void parseRequestConfig( inferenceEntityId, taskType, NAME, - (ElserInternalServiceSettings) serviceSettingsBuilder.build(), + new ElserInternalServiceSettings(serviceSettingsBuilder.build()), taskSettings ) ); @@ -131,18 +120,6 @@ public void parseRequestConfig( } } - private static String selectDefaultModelVersionBasedOnClusterArchitecture(Set modelArchitectures) { - // choose a default model ID based on the cluster architecture - boolean homogenous = modelArchitectures.size() == 1; - if (homogenous && modelArchitectures.iterator().next().equals("linux-x86_64")) { - // Use the hardware optimized model - return ELSER_V2_MODEL_LINUX_X86; - } else { - // default to the platform-agnostic model - return ELSER_V2_MODEL; - } - } - @Override public ElserInternalModel parsePersistedConfigWithSecrets( String inferenceEntityId, @@ -164,7 +141,7 @@ public ElserInternalModel parsePersistedConfig(String inferenceEntityId, TaskTyp serviceSettingsMap.put(ElserInternalServiceSettings.MODEL_ID, modelId); } - var serviceSettingsBuilder = ElserInternalServiceSettings.fromMap(serviceSettingsMap); + var serviceSettings = ElserInternalServiceSettings.fromPersistedMap(serviceSettingsMap); Map taskSettingsMap; // task settings are optional @@ -176,85 +153,7 @@ public ElserInternalModel parsePersistedConfig(String inferenceEntityId, TaskTyp var taskSettings = taskSettingsFromMap(taskType, taskSettingsMap); - return new ElserInternalModel( - inferenceEntityId, - taskType, - NAME, - (ElserInternalServiceSettings) serviceSettingsBuilder.build(), - taskSettings - ); - } - - @Override - public void start(Model model, ActionListener listener) { - if (model instanceof ElserInternalModel == false) { - listener.onFailure( - new IllegalStateException( - "Error starting model, [" + model.getConfigurations().getInferenceEntityId() + "] is not an ELSER model" - ) - ); - return; - } - - if (model.getConfigurations().getTaskType() != TaskType.SPARSE_EMBEDDING) { - listener.onFailure( - new IllegalStateException(TaskType.unsupportedTaskTypeErrorMsg(model.getConfigurations().getTaskType(), NAME)) - ); - return; - } - - client.execute(StartTrainedModelDeploymentAction.INSTANCE, startDeploymentRequest(model), elserNotDownloadedListener(listener)); - } - - private static StartTrainedModelDeploymentAction.Request startDeploymentRequest(Model model) { - var elserModel = (ElserInternalModel) model; - var serviceSettings = elserModel.getServiceSettings(); - - var startRequest = new StartTrainedModelDeploymentAction.Request( - serviceSettings.modelId(), - model.getConfigurations().getInferenceEntityId() - ); - startRequest.setNumberOfAllocations(serviceSettings.getNumAllocations()); - startRequest.setThreadsPerAllocation(serviceSettings.getNumThreads()); - startRequest.setAdaptiveAllocationsSettings(serviceSettings.getAdaptiveAllocationsSettings()); - startRequest.setWaitForState(STARTED); - return startRequest; - } - - private static ActionListener elserNotDownloadedListener( - ActionListener listener - ) { - return new ActionListener<>() { - @Override - public void onResponse(CreateTrainedModelAssignmentAction.Response response) { - listener.onResponse(Boolean.TRUE); - } - - @Override - public void onFailure(Exception e) { - if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { - listener.onFailure( - new ResourceNotFoundException( - "Could not start the ELSER service as the ELSER model for this platform cannot be found." - + " ELSER needs to be downloaded before it can be started." - ) - ); - return; - } - listener.onFailure(e); - } - }; - } - - @Override - public void stop(String inferenceEntityId, ActionListener listener) { - var request = new StopTrainedModelDeploymentAction.Request(inferenceEntityId); - request.setForce(true); - client.execute( - StopTrainedModelDeploymentAction.INSTANCE, - request, - listener.delegateFailureAndWrap((delegatedResponseListener, response) -> delegatedResponseListener.onResponse(Boolean.TRUE)) - ); + return new ElserInternalModel(inferenceEntityId, taskType, NAME, new ElserInternalServiceSettings(serviceSettings), taskSettings); } @Override @@ -352,32 +251,6 @@ private void checkCompatibleTaskType(TaskType taskType) { } } - @Override - public void putModel(Model model, ActionListener listener) { - if (model instanceof ElserInternalModel == false) { - listener.onFailure( - new IllegalStateException( - "Error starting model, [" + model.getConfigurations().getInferenceEntityId() + "] is not an ELSER model" - ) - ); - return; - } else { - String modelId = ((ElserInternalModel) model).getServiceSettings().modelId(); - var input = new TrainedModelInput(List.of("text_field")); // by convention text_field is used - var config = TrainedModelConfig.builder().setInput(input).setModelId(modelId).validate(true).build(); - PutTrainedModelAction.Request putRequest = new PutTrainedModelAction.Request(config, false, true); - executeAsyncWithOrigin( - client, - INFERENCE_ORIGIN, - PutTrainedModelAction.INSTANCE, - putRequest, - listener.delegateFailure((l, r) -> { - l.onResponse(Boolean.TRUE); - }) - ); - } - } - @Override public void isModelDownloaded(Model model, ActionListener listener) { ActionListener getModelsResponseListener = listener.delegateFailure((delegate, response) -> { @@ -437,9 +310,6 @@ public String name() { return NAME; } - @Override - public void close() throws IOException {} - @Override public TransportVersion getMinimalSupportedVersion() { return TransportVersions.V_8_12_0; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettings.java index ee8b457807e77..75797919b3616 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettings.java @@ -11,83 +11,41 @@ import org.elasticsearch.TransportVersions; import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; -import org.elasticsearch.xpack.inference.services.ServiceUtils; -import org.elasticsearch.xpack.inference.services.settings.InternalServiceSettings; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalServiceSettings; import java.io.IOException; -import java.util.List; +import java.util.Arrays; import java.util.Map; -import java.util.Objects; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalString; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredPositiveInteger; +import static org.elasticsearch.xpack.inference.services.elser.ElserInternalService.VALID_ELSER_MODEL_IDS; -public class ElserInternalServiceSettings extends InternalServiceSettings { +public class ElserInternalServiceSettings extends ElasticsearchInternalServiceSettings { public static final String NAME = "elser_mlnode_service_settings"; - /** - * Parse the Elser service setting from map and validate the setting values. - * - * If required setting are missing or the values are invalid an - * {@link ValidationException} is thrown. - * - * @param map Source map containing the config - * @return The {@code ElserInternalServiceSettings} - */ - public static ElserInternalServiceSettings.Builder fromMap(Map map) { + public static ElasticsearchInternalServiceSettings.Builder fromRequestMap(Map map) { ValidationException validationException = new ValidationException(); + var baseSettings = ElasticsearchInternalServiceSettings.fromMap(map, validationException); - Integer numAllocations = extractOptionalPositiveInteger( - map, - NUM_ALLOCATIONS, - ModelConfigurations.SERVICE_SETTINGS, - validationException - ); - Integer numThreads = extractRequiredPositiveInteger(map, NUM_THREADS, ModelConfigurations.SERVICE_SETTINGS, validationException); - AdaptiveAllocationsSettings adaptiveAllocationsSettings = ServiceUtils.removeAsAdaptiveAllocationsSettings( - map, - ADAPTIVE_ALLOCATIONS, - validationException - ); - if (numAllocations == null && adaptiveAllocationsSettings == null) { - validationException.addValidationError( - ServiceUtils.missingOneOfSettingsErrorMsg( - List.of(NUM_ALLOCATIONS, ADAPTIVE_ALLOCATIONS), - ModelConfigurations.SERVICE_SETTINGS - ) + String modelId = baseSettings.getModelId(); + if (modelId != null && VALID_ELSER_MODEL_IDS.contains(modelId) == false) { + var ve = new ValidationException(); + ve.addValidationError( + "Unknown ELSER model ID [" + modelId + "]. Valid models are " + Arrays.toString(VALID_ELSER_MODEL_IDS.toArray()) ); - } - String modelId = extractOptionalString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); - - if (modelId != null && ElserInternalService.VALID_ELSER_MODEL_IDS.contains(modelId) == false) { - validationException.addValidationError("unknown ELSER model id [" + modelId + "]"); + throw ve; } if (validationException.validationErrors().isEmpty() == false) { throw validationException; } - var builder = new InternalServiceSettings.Builder() { - @Override - public ElserInternalServiceSettings build() { - return new ElserInternalServiceSettings( - getNumAllocations(), - getNumThreads(), - getModelId(), - getAdaptiveAllocationsSettings() - ); - } - }; - builder.setNumAllocations(numAllocations); - builder.setNumThreads(numThreads); - builder.setAdaptiveAllocationsSettings(adaptiveAllocationsSettings); - builder.setModelId(modelId); - return builder; + return baseSettings; + } + + public ElserInternalServiceSettings(ElasticsearchInternalServiceSettings other) { + super(other); } public ElserInternalServiceSettings( @@ -96,19 +54,11 @@ public ElserInternalServiceSettings( String modelId, AdaptiveAllocationsSettings adaptiveAllocationsSettings ) { - super(numAllocations, numThreads, modelId, adaptiveAllocationsSettings); - Objects.requireNonNull(modelId); + this(new ElasticsearchInternalServiceSettings(numAllocations, numThreads, modelId, adaptiveAllocationsSettings)); } public ElserInternalServiceSettings(StreamInput in) throws IOException { - super( - in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS) ? in.readOptionalVInt() : in.readVInt(), - in.readVInt(), - in.getTransportVersion().onOrAfter(TransportVersions.V_8_11_X) ? in.readString() : ElserInternalService.ELSER_V2_MODEL, - in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS) - ? in.readOptionalWriteable(AdaptiveAllocationsSettings::new) - : null - ); + super(in); } @Override @@ -120,36 +70,4 @@ public String getWriteableName() { public TransportVersion getMinimalSupportedVersion() { return TransportVersions.V_8_11_X; } - - @Override - public void writeTo(StreamOutput out) throws IOException { - if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) { - out.writeOptionalVInt(getNumAllocations()); - } else { - out.writeVInt(getNumAllocations()); - } - out.writeVInt(getNumThreads()); - if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_11_X)) { - out.writeString(modelId()); - } - if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) { - out.writeOptionalWriteable(getAdaptiveAllocationsSettings()); - } - } - - @Override - public int hashCode() { - return Objects.hash(NAME, getNumAllocations(), getNumThreads(), modelId(), getAdaptiveAllocationsSettings()); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - ElserInternalServiceSettings that = (ElserInternalServiceSettings) o; - return getNumAllocations() == that.getNumAllocations() - && getNumThreads() == that.getNumThreads() - && Objects.equals(modelId(), that.modelId()) - && Objects.equals(getAdaptiveAllocationsSettings(), that.getAdaptiveAllocationsSettings()); - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/settings/InternalServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/settings/InternalServiceSettings.java deleted file mode 100644 index 18b85b812b167..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/settings/InternalServiceSettings.java +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.settings; - -import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.inference.ServiceSettings; -import org.elasticsearch.xcontent.ToXContentObject; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; - -import java.io.IOException; -import java.util.Objects; - -public abstract class InternalServiceSettings implements ServiceSettings { - - public static final String NUM_ALLOCATIONS = "num_allocations"; - public static final String NUM_THREADS = "num_threads"; - public static final String MODEL_ID = "model_id"; - public static final String ADAPTIVE_ALLOCATIONS = "adaptive_allocations"; - - private final Integer numAllocations; - private final int numThreads; - private final String modelId; - private final AdaptiveAllocationsSettings adaptiveAllocationsSettings; - - public InternalServiceSettings( - Integer numAllocations, - int numThreads, - String modelId, - AdaptiveAllocationsSettings adaptiveAllocationsSettings - ) { - this.numAllocations = numAllocations; - this.numThreads = numThreads; - this.modelId = modelId; - this.adaptiveAllocationsSettings = adaptiveAllocationsSettings; - } - - public Integer getNumAllocations() { - return numAllocations; - } - - public int getNumThreads() { - return numThreads; - } - - @Override - public String modelId() { - return modelId; - } - - public AdaptiveAllocationsSettings getAdaptiveAllocationsSettings() { - return adaptiveAllocationsSettings; - } - - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - InternalServiceSettings that = (InternalServiceSettings) o; - return Objects.equals(numAllocations, that.numAllocations) - && numThreads == that.numThreads - && Objects.equals(modelId, that.modelId) - && Objects.equals(adaptiveAllocationsSettings, that.adaptiveAllocationsSettings); - } - - @Override - public int hashCode() { - return Objects.hash(numAllocations, numThreads, modelId, adaptiveAllocationsSettings); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); - addXContentFragment(builder, params); - builder.endObject(); - return builder; - } - - public void addXContentFragment(XContentBuilder builder, Params params) throws IOException { - if (numAllocations != null) { - builder.field(NUM_ALLOCATIONS, getNumAllocations()); - } - - builder.field(NUM_THREADS, getNumThreads()); - builder.field(MODEL_ID, modelId()); - - if (adaptiveAllocationsSettings != null) { - builder.field(ADAPTIVE_ALLOCATIONS, getAdaptiveAllocationsSettings()); - } - } - - @Override - public ToXContentObject getFilteredXContentObject() { - return this; - } - - @Override - public boolean isFragment() { - return ServiceSettings.super.isFragment(); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) { - out.writeOptionalVInt(getNumAllocations()); - } else { - out.writeVInt(getNumAllocations()); - } - out.writeVInt(getNumThreads()); - out.writeString(modelId()); - if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) { - out.writeOptionalWriteable(getAdaptiveAllocationsSettings()); - } - } - - public abstract static class Builder { - private Integer numAllocations; - private int numThreads; - private String modelId; - private AdaptiveAllocationsSettings adaptiveAllocationsSettings; - - public abstract InternalServiceSettings build(); - - public void setNumAllocations(Integer numAllocations) { - this.numAllocations = numAllocations; - } - - public void setNumThreads(int numThreads) { - this.numThreads = numThreads; - } - - public void setModelId(String modelId) { - this.modelId = modelId; - } - - public void setAdaptiveAllocationsSettings(AdaptiveAllocationsSettings adaptiveAllocationsSettings) { - this.adaptiveAllocationsSettings = adaptiveAllocationsSettings; - } - - public String getModelId() { - return modelId; - } - - public Integer getNumAllocations() { - return numAllocations; - } - - public int getNumThreads() { - return numThreads; - } - - public AdaptiveAllocationsSettings getAdaptiveAllocationsSettings() { - return adaptiveAllocationsSettings; - } - } -} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalTextEmbeddingServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalTextEmbeddingServiceSettingsTests.java index c06aad881d2ab..ebb9c964e4c9a 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalTextEmbeddingServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandInternalTextEmbeddingServiceSettingsTests.java @@ -23,8 +23,8 @@ import java.util.Map; import static org.elasticsearch.xpack.inference.services.ServiceFields.ELEMENT_TYPE; -import static org.elasticsearch.xpack.inference.services.settings.InternalServiceSettings.NUM_ALLOCATIONS; -import static org.elasticsearch.xpack.inference.services.settings.InternalServiceSettings.NUM_THREADS; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalServiceSettings.NUM_THREADS; import static org.hamcrest.Matchers.is; public class CustomElandInternalTextEmbeddingServiceSettingsTests extends AbstractWireSerializingTestCase< diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettingsTests.java new file mode 100644 index 0000000000000..41afef88d22c6 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettingsTests.java @@ -0,0 +1,132 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.elasticsearch; + +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractWireSerializingTestCase; +import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; +import org.elasticsearch.xpack.inference.services.elser.ElserInternalServiceSettings; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.Matchers.containsString; + +public class ElasticsearchInternalServiceSettingsTests extends AbstractWireSerializingTestCase { + + public static ElasticsearchInternalServiceSettings validInstance(String modelId) { + boolean useAdaptive = randomBoolean(); + if (useAdaptive) { + var adaptive = new AdaptiveAllocationsSettings(true, 1, randomIntBetween(2, 8)); + return new ElasticsearchInternalServiceSettings(randomBoolean() ? 1 : null, randomIntBetween(1, 16), modelId, adaptive); + } else { + return new ElasticsearchInternalServiceSettings(randomIntBetween(1, 10), randomIntBetween(1, 16), modelId, null); + } + } + + @Override + protected Writeable.Reader instanceReader() { + return ElasticsearchInternalServiceSettings::new; + } + + @Override + protected ElasticsearchInternalServiceSettings createTestInstance() { + return validInstance("my-model"); + } + + @Override + protected ElasticsearchInternalServiceSettings mutateInstance(ElasticsearchInternalServiceSettings instance) throws IOException { + return switch (randomIntBetween(0, 2)) { + case 0 -> new ElserInternalServiceSettings( + new ElasticsearchInternalServiceSettings( + instance.getNumAllocations() == null ? 1 : instance.getNumAllocations() + 1, + instance.getNumThreads(), + instance.modelId(), + instance.getAdaptiveAllocationsSettings() + ) + ); + case 1 -> new ElserInternalServiceSettings( + new ElasticsearchInternalServiceSettings( + instance.getNumAllocations(), + instance.getNumThreads() + 1, + instance.modelId(), + instance.getAdaptiveAllocationsSettings() + ) + ); + case 2 -> new ElserInternalServiceSettings( + new ElasticsearchInternalServiceSettings( + instance.getNumAllocations(), + instance.getNumThreads(), + instance.modelId() + "-bar", + instance.getAdaptiveAllocationsSettings() + ) + ); + default -> throw new IllegalStateException(); + }; + } + + public void testFromRequestMap_NoDefaultModel() { + var serviceSettingsBuilder = ElasticsearchInternalServiceSettings.fromRequestMap( + new HashMap<>( + Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4) + ) + ); + assertNull(serviceSettingsBuilder.getModelId()); + } + + public void testFromMap() { + var serviceSettings = ElasticsearchInternalServiceSettings.fromRequestMap( + new HashMap<>( + Map.of( + ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, + 1, + ElasticsearchInternalServiceSettings.NUM_THREADS, + 4, + ElasticsearchInternalServiceSettings.MODEL_ID, + ".elser_model_1" + ) + ) + ).build(); + assertEquals(new ElasticsearchInternalServiceSettings(1, 4, ".elser_model_1", null), serviceSettings); + } + + public void testFromMapMissingOptions() { + var e = expectThrows( + ValidationException.class, + () -> ElasticsearchInternalServiceSettings.fromRequestMap( + new HashMap<>(Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 1)) + ) + ); + + assertThat(e.getMessage(), containsString("[service_settings] does not contain the required setting [num_threads]")); + + e = expectThrows( + ValidationException.class, + () -> ElasticsearchInternalServiceSettings.fromRequestMap( + new HashMap<>(Map.of(ElasticsearchInternalServiceSettings.NUM_THREADS, 1)) + ) + ); + + assertThat( + e.getMessage(), + containsString("[service_settings] does not contain one of the required settings [num_allocations, adaptive_allocations]") + ); + } + + public void testFromMapInvalidSettings() { + var settingsMap = new HashMap( + Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 0, ElasticsearchInternalServiceSettings.NUM_THREADS, -1) + ); + var e = expectThrows(ValidationException.class, () -> ElasticsearchInternalServiceSettings.fromRequestMap(settingsMap)); + + assertThat(e.getMessage(), containsString("Invalid value [0]. [num_allocations] must be a positive integer")); + assertThat(e.getMessage(), containsString("Invalid value [-1]. [num_threads] must be a positive integer")); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java index ad1910cb9fc0a..e6fd725a50198 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java @@ -46,7 +46,6 @@ import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; import org.elasticsearch.xpack.core.utils.FloatConversionUtils; import org.elasticsearch.xpack.inference.services.ServiceFields; -import org.elasticsearch.xpack.inference.services.settings.InternalServiceSettings; import org.junit.After; import org.junit.Before; import org.mockito.ArgumentCaptor; @@ -125,7 +124,7 @@ public void testParseRequestConfig() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID ) ) @@ -161,7 +160,7 @@ public void testParseRequestConfig() { ActionListener modelListener = ActionListener.wrap( model -> fail("Model parsing should have failed"), - e -> assertThat(e, instanceOf(IllegalArgumentException.class)) + e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) ); service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); @@ -179,7 +178,7 @@ public void testParseRequestConfig() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, // we can't directly test the eland case until we mock // the threadpool within the client "not_a_valid_service_setting", @@ -208,7 +207,7 @@ public void testParseRequestConfig() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, // we can't directly test the eland case until we mock // the threadpool within the client "extra_setting_that_should_not_be_here", @@ -237,7 +236,7 @@ public void testParseRequestConfig() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID // we can't directly test the eland case until we mock // the threadpool within the client ) @@ -279,7 +278,7 @@ public void testParseRequestConfig_Rerank() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, "foo" ) ) @@ -326,7 +325,7 @@ public void testParseRequestConfig_Rerank_DefaultTaskSettings() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, "foo" ) ) @@ -390,7 +389,7 @@ public void testParsePersistedConfig() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, "invalid" ) ) @@ -420,7 +419,7 @@ public void testParsePersistedConfig() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, ServiceFields.DIMENSIONS, 1 @@ -641,12 +640,12 @@ public void testParsePersistedConfig_Rerank() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, "foo" ) ) ); - settings.put(InternalServiceSettings.MODEL_ID, "foo"); + settings.put(ElasticsearchInternalServiceSettings.MODEL_ID, "foo"); var returnDocs = randomBoolean(); settings.put( ModelConfigurations.TASK_SETTINGS, @@ -670,12 +669,12 @@ public void testParsePersistedConfig_Rerank() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, "foo" ) ) ); - settings.put(InternalServiceSettings.MODEL_ID, "foo"); + settings.put(ElasticsearchInternalServiceSettings.MODEL_ID, "foo"); var model = service.parsePersistedConfig(randomInferenceEntityId, TaskType.RERANK, settings); assertThat(model.getTaskSettings(), instanceOf(CustomElandRerankTaskSettings.class)); @@ -706,7 +705,7 @@ public void testParseRequestConfigEland_PreservesTaskType() { 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4, - InternalServiceSettings.MODEL_ID, + ElasticsearchInternalServiceSettings.MODEL_ID, "custom-model" ) ) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettingsTests.java index b0218905040ae..f685eb3732a89 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettingsTests.java @@ -22,49 +22,17 @@ public class MultilingualE5SmallInternalServiceSettingsTests extends AbstractWir public static MultilingualE5SmallInternalServiceSettings createRandom() { return new MultilingualE5SmallInternalServiceSettings( - randomIntBetween(1, 4), - randomIntBetween(1, 4), - randomFrom(ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_VALID_IDS), - null - ); - } - - public void testFromMap_DefaultModelVersion() { - var serviceSettingsBuilder = MultilingualE5SmallInternalServiceSettings.fromMap( - new HashMap<>( - Map.of( - MultilingualE5SmallInternalServiceSettings.NUM_ALLOCATIONS, - 1, - MultilingualE5SmallInternalServiceSettings.NUM_THREADS, - 4 - ) + ElasticsearchInternalServiceSettingsTests.validInstance( + randomFrom(ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_VALID_IDS) ) ); - assertNull(serviceSettingsBuilder.getModelId()); - } - - public void testFromMap() { - String randomModelVariant = randomFrom(ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_VALID_IDS); - var serviceSettings = MultilingualE5SmallInternalServiceSettings.fromMap( - new HashMap<>( - Map.of( - MultilingualE5SmallInternalServiceSettings.NUM_ALLOCATIONS, - 1, - MultilingualE5SmallInternalServiceSettings.NUM_THREADS, - 4, - MultilingualE5SmallInternalServiceSettings.MODEL_ID, - randomModelVariant - ) - ) - ).build(); - assertEquals(new MultilingualE5SmallInternalServiceSettings(1, 4, randomModelVariant, null), serviceSettings); } public void testFromMapInvalidVersion() { String randomModelVariant = randomAlphaOfLength(10); var e = expectThrows( ValidationException.class, - () -> MultilingualE5SmallInternalServiceSettings.fromMap( + () -> MultilingualE5SmallInternalServiceSettings.fromRequestMap( new HashMap<>( Map.of( MultilingualE5SmallInternalServiceSettings.NUM_ALLOCATIONS, @@ -83,7 +51,7 @@ public void testFromMapInvalidVersion() { public void testFromMapMissingOptions() { var e = expectThrows( ValidationException.class, - () -> MultilingualE5SmallInternalServiceSettings.fromMap( + () -> MultilingualE5SmallInternalServiceSettings.fromRequestMap( new HashMap<>(Map.of(MultilingualE5SmallInternalServiceSettings.NUM_ALLOCATIONS, 1)) ) ); @@ -92,7 +60,7 @@ public void testFromMapMissingOptions() { e = expectThrows( ValidationException.class, - () -> MultilingualE5SmallInternalServiceSettings.fromMap( + () -> MultilingualE5SmallInternalServiceSettings.fromRequestMap( new HashMap<>(Map.of(MultilingualE5SmallInternalServiceSettings.NUM_THREADS, 1)) ) ); @@ -112,7 +80,7 @@ public void testFromMapInvalidSettings() { -1 ) ); - var e = expectThrows(ValidationException.class, () -> MultilingualE5SmallInternalServiceSettings.fromMap(settingsMap)); + var e = expectThrows(ValidationException.class, () -> MultilingualE5SmallInternalServiceSettings.fromRequestMap(settingsMap)); assertThat(e.getMessage(), containsString("Invalid value [0]. [num_allocations] must be a positive integer")); assertThat(e.getMessage(), containsString("Invalid value [-1]. [num_threads] must be a positive integer")); @@ -132,7 +100,7 @@ protected MultilingualE5SmallInternalServiceSettings createTestInstance() { protected MultilingualE5SmallInternalServiceSettings mutateInstance(MultilingualE5SmallInternalServiceSettings instance) { return switch (randomIntBetween(0, 2)) { case 0 -> new MultilingualE5SmallInternalServiceSettings( - instance.getNumAllocations() + 1, + instance.getNumAllocations() == null ? 1 : instance.getNumAllocations() + 1, instance.getNumThreads(), instance.modelId(), null diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettingsTests.java index e4a7f4481e025..ec753b9bec887 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettingsTests.java @@ -8,112 +8,35 @@ package org.elasticsearch.xpack.inference.services.elser; import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.test.AbstractWireSerializingTestCase; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalServiceSettings; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalServiceSettingsTests; import java.io.IOException; -import java.util.HashMap; import java.util.HashSet; -import java.util.Map; - -import static org.hamcrest.Matchers.containsString; public class ElserInternalServiceSettingsTests extends AbstractWireSerializingTestCase { public static ElserInternalServiceSettings createRandom() { return new ElserInternalServiceSettings( - randomIntBetween(1, 4), - randomIntBetween(1, 2), - randomFrom(ElserInternalService.VALID_ELSER_MODEL_IDS), - null - ); - } - - public void testFromMap_DefaultModelVersion() { - var serviceSettingsBuilder = ElserInternalServiceSettings.fromMap( - new HashMap<>(Map.of(ElserInternalServiceSettings.NUM_ALLOCATIONS, 1, ElserInternalServiceSettings.NUM_THREADS, 4)) - ); - assertNull(serviceSettingsBuilder.getModelId()); - } - - public void testFromMap() { - var serviceSettings = ElserInternalServiceSettings.fromMap( - new HashMap<>( - Map.of( - ElserInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElserInternalServiceSettings.NUM_THREADS, - 4, - ElserInternalServiceSettings.MODEL_ID, - ".elser_model_1" - ) - ) - ).build(); - assertEquals(new ElserInternalServiceSettings(1, 4, ".elser_model_1", null), serviceSettings); - } - - public void testFromMapInvalidVersion() { - var e = expectThrows( - ValidationException.class, - () -> ElserInternalServiceSettings.fromMap( - new HashMap<>( - Map.of( - ElserInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElserInternalServiceSettings.NUM_THREADS, - 4, - "model_id", - ".elser_model_27" - ) - ) - ) - ); - assertThat(e.getMessage(), containsString("unknown ELSER model id [.elser_model_27]")); - } - - public void testFromMapMissingOptions() { - var e = expectThrows( - ValidationException.class, - () -> ElserInternalServiceSettings.fromMap(new HashMap<>(Map.of(ElserInternalServiceSettings.NUM_ALLOCATIONS, 1))) - ); - - assertThat(e.getMessage(), containsString("[service_settings] does not contain the required setting [num_threads]")); - - e = expectThrows( - ValidationException.class, - () -> ElserInternalServiceSettings.fromMap(new HashMap<>(Map.of(ElserInternalServiceSettings.NUM_THREADS, 1))) - ); - - assertThat( - e.getMessage(), - containsString("[service_settings] does not contain one of the required settings [num_allocations, adaptive_allocations]") + ElasticsearchInternalServiceSettingsTests.validInstance(randomFrom(ElserInternalService.VALID_ELSER_MODEL_IDS)) ); } public void testBwcWrite() throws IOException { { - var settings = new ElserInternalServiceSettings(1, 1, ".elser_model_1", null); + var settings = new ElserInternalServiceSettings(new ElasticsearchInternalServiceSettings(1, 1, ".elser_model_1", null)); var copy = copyInstance(settings, TransportVersions.V_8_12_0); assertEquals(settings, copy); } { - var settings = new ElserInternalServiceSettings(1, 1, ".elser_model_1", null); + var settings = new ElserInternalServiceSettings(new ElasticsearchInternalServiceSettings(1, 1, ".elser_model_1", null)); var copy = copyInstance(settings, TransportVersions.V_8_11_X); assertEquals(settings, copy); } } - public void testFromMapInvalidSettings() { - var settingsMap = new HashMap( - Map.of(ElserInternalServiceSettings.NUM_ALLOCATIONS, 0, ElserInternalServiceSettings.NUM_THREADS, -1) - ); - var e = expectThrows(ValidationException.class, () -> ElserInternalServiceSettings.fromMap(settingsMap)); - - assertThat(e.getMessage(), containsString("Invalid value [0]. [num_allocations] must be a positive integer")); - assertThat(e.getMessage(), containsString("Invalid value [-1]. [num_threads] must be a positive integer")); - } - @Override protected Writeable.Reader instanceReader() { return ElserInternalServiceSettings::new; @@ -128,25 +51,31 @@ protected ElserInternalServiceSettings createTestInstance() { protected ElserInternalServiceSettings mutateInstance(ElserInternalServiceSettings instance) { return switch (randomIntBetween(0, 2)) { case 0 -> new ElserInternalServiceSettings( - instance.getNumAllocations() + 1, - instance.getNumThreads(), - instance.modelId(), - null + new ElasticsearchInternalServiceSettings( + instance.getNumAllocations() == null ? 1 : instance.getNumAllocations() + 1, + instance.getNumThreads(), + instance.modelId(), + null + ) ); case 1 -> new ElserInternalServiceSettings( - instance.getNumAllocations(), - instance.getNumThreads() + 1, - instance.modelId(), - null + new ElasticsearchInternalServiceSettings( + instance.getNumAllocations(), + instance.getNumThreads() + 1, + instance.modelId(), + null + ) ); case 2 -> { var versions = new HashSet<>(ElserInternalService.VALID_ELSER_MODEL_IDS); versions.remove(instance.modelId()); yield new ElserInternalServiceSettings( - instance.getNumAllocations(), - instance.getNumThreads(), - versions.iterator().next(), - null + new ElasticsearchInternalServiceSettings( + instance.getNumAllocations(), + instance.getNumThreads(), + versions.iterator().next(), + null + ) ); } default -> throw new IllegalStateException(); From ede4038827322e1ab26eb21795d9f14d79420806 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Wed, 17 Jul 2024 12:00:06 +0100 Subject: [PATCH 14/65] Fix FileSettingsUpgradeIT to not create irrelevant clusters at all (#110963) --- muted-tests.yml | 2 -- .../upgrades/FileSettingsUpgradeIT.java | 15 +++++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index fe6625de3b030..13f9884c251f0 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -85,8 +85,6 @@ tests: - class: org.elasticsearch.xpack.searchablesnapshots.cache.common.CacheFileTests method: testCacheFileCreatedAsSparseFile issue: https://github.com/elastic/elasticsearch/issues/110801 -- class: org.elasticsearch.upgrades.FileSettingsUpgradeIT - issue: https://github.com/elastic/elasticsearch/issues/110884 - class: "org.elasticsearch.xpack.watcher.test.integration.HistoryIntegrationTests" issue: "https://github.com/elastic/elasticsearch/issues/110885" method: "testPayloadInputWithDotsInFieldNameWorks" diff --git a/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/FileSettingsUpgradeIT.java b/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/FileSettingsUpgradeIT.java index c80911fe5fbcf..8ffaec5506f1d 100644 --- a/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/FileSettingsUpgradeIT.java +++ b/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/FileSettingsUpgradeIT.java @@ -15,11 +15,10 @@ import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.test.cluster.FeatureFlag; -import org.elasticsearch.test.cluster.local.DefaultLocalClusterSpecBuilder; import org.elasticsearch.test.cluster.local.distribution.DistributionType; import org.elasticsearch.test.cluster.util.Version; import org.elasticsearch.test.cluster.util.resource.Resource; -import org.junit.BeforeClass; +import org.elasticsearch.test.junit.RunnableTestRuleAdapter; import org.junit.ClassRule; import org.junit.rules.RuleChain; import org.junit.rules.TemporaryFolder; @@ -33,10 +32,9 @@ public class FileSettingsUpgradeIT extends ParameterizedRollingUpgradeTestCase { - @BeforeClass - public static void checkVersion() { - assumeTrue("Only valid when upgrading from pre-file settings", getOldClusterTestVersion().before(new Version(8, 4, 0))); - } + private static final RunnableTestRuleAdapter versionLimit = new RunnableTestRuleAdapter( + () -> assumeTrue("Only valid when upgrading from pre-file settings", getOldClusterTestVersion().before(new Version(8, 4, 0))) + ); private static final String settingsJSON = """ { @@ -53,7 +51,8 @@ public static void checkVersion() { private static final TemporaryFolder repoDirectory = new TemporaryFolder(); - private static final ElasticsearchCluster cluster = new DefaultLocalClusterSpecBuilder().distribution(DistributionType.DEFAULT) + private static final ElasticsearchCluster cluster = ElasticsearchCluster.local() + .distribution(DistributionType.DEFAULT) .version(getOldClusterTestVersion()) .nodes(NODE_NUM) .setting("path.repo", new Supplier<>() { @@ -69,7 +68,7 @@ public String get() { .build(); @ClassRule - public static TestRule ruleChain = RuleChain.outerRule(repoDirectory).around(cluster); + public static TestRule ruleChain = RuleChain.outerRule(versionLimit).around(repoDirectory).around(cluster); public FileSettingsUpgradeIT(@Name("upgradedNodes") int upgradedNodes) { super(upgradedNodes); From e412f28624bced5ce97fe312e082fbea635358cc Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Wed, 17 Jul 2024 13:18:27 +0200 Subject: [PATCH 15/65] [ESQL] Make query wrapped by SingleValueQuery cacheable (#110116) This commit simplifies SingleValueQuery by introducing a new lucene query called SingleValueMatchQuery. This query iterates over doc values and return only the fields that are single-valued. --- docs/changelog/110116.yaml | 5 + .../querydsl/query/SingleValueMatchQuery.java | 344 +++++++++++ .../esql/querydsl/query/SingleValueQuery.java | 569 +----------------- .../query/SingleValueMathQueryTests.java | 203 +++++++ .../SingleValueQuerySerializationTests.java | 4 +- .../querydsl/query/SingleValueQueryTests.java | 149 +---- 6 files changed, 575 insertions(+), 699 deletions(-) create mode 100644 docs/changelog/110116.yaml create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMatchQuery.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMathQueryTests.java diff --git a/docs/changelog/110116.yaml b/docs/changelog/110116.yaml new file mode 100644 index 0000000000000..9c309b8b80311 --- /dev/null +++ b/docs/changelog/110116.yaml @@ -0,0 +1,5 @@ +pr: 110116 +summary: "[ESQL] Make query wrapped by `SingleValueQuery` cacheable" +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMatchQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMatchQuery.java new file mode 100644 index 0000000000000..ac75a58baaa06 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMatchQuery.java @@ -0,0 +1,344 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.querydsl.query; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.elasticsearch.index.fielddata.FieldData; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.LeafFieldData; +import org.elasticsearch.index.fielddata.LeafNumericFieldData; +import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData; +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.xpack.esql.expression.function.Warnings; + +import java.io.IOException; +import java.util.Objects; + +/** + * Finds all fields with a single-value. If a field has a multi-value, it emits a {@link Warnings}. + */ +final class SingleValueMatchQuery extends Query { + + /** + * The estimated number of comparisons to check if a {@link DocValues} + * has more than one value. There isn't a good way to get that number out of + * {@link DocValues} so this is a guess. + */ + private static final int MULTI_VALUE_MATCH_COST = 10; + private static final IllegalArgumentException MULTI_VALUE_EXCEPTION = new IllegalArgumentException( + "single-value function encountered multi-value" + ); + private final IndexFieldData fieldData; + private final Warnings warnings; + + SingleValueMatchQuery(IndexFieldData fieldData, Warnings warnings) { + this.fieldData = fieldData; + this.warnings = warnings; + } + + @Override + public String toString(String field) { + StringBuilder builder = new StringBuilder("single_value_match("); + if (false == this.fieldData.getFieldName().equals(field)) { + builder.append(this.fieldData.getFieldName()); + } + return builder.append(")").toString(); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) { + return new ConstantScoreWeight(this, boost) { + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + final ScorerSupplier scorerSupplier = scorerSupplier(context); + if (scorerSupplier == null) { + return null; + } + return scorerSupplier.get(Long.MAX_VALUE); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + final LeafFieldData lfd = fieldData.load(context); + if (lfd == null) { + return null; + } + /* + * SortedBinaryDocValues are available for most fields, but they + * are made available by eagerly converting non-bytes values to + * utf-8 strings. The eager conversion is quite expensive. So + * we specialize on numeric fields and fields with ordinals to + * avoid that expense in at least that case. + * + * Also! Lucene's FieldExistsQuery only needs one scorer that can + * use all the docs values iterators at DocIdSetIterators. We + * can't do that because we need the check the number of fields. + */ + if (lfd instanceof LeafNumericFieldData n) { + return scorerSupplier(context, n.getLongValues(), this, boost, scoreMode); + } + if (lfd instanceof LeafOrdinalsFieldData o) { + return scorerSupplier(context, o.getOrdinalsValues(), this, boost, scoreMode); + } + return scorerSupplier(context, lfd.getBytesValues(), this, boost, scoreMode); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + // don't cache so we can emit warnings + return false; + } + + private ScorerSupplier scorerSupplier( + LeafReaderContext context, + SortedNumericDocValues sortedNumerics, + Weight weight, + float boost, + ScoreMode scoreMode + ) throws IOException { + final int maxDoc = context.reader().maxDoc(); + if (DocValues.unwrapSingleton(sortedNumerics) != null) { + // check for dense field + final PointValues points = context.reader().getPointValues(fieldData.getFieldName()); + if (points != null && points.getDocCount() == maxDoc) { + return new DocIdSetIteratorScorerSupplier(weight, boost, scoreMode, DocIdSetIterator.all(maxDoc)); + } else { + return new PredicateScorerSupplier( + weight, + boost, + scoreMode, + maxDoc, + MULTI_VALUE_MATCH_COST, + sortedNumerics::advanceExact + ); + } + } + final CheckedIntPredicate predicate = doc -> { + if (false == sortedNumerics.advanceExact(doc)) { + return false; + } + if (sortedNumerics.docValueCount() != 1) { + warnings.registerException(MULTI_VALUE_EXCEPTION); + return false; + } + return true; + }; + return new PredicateScorerSupplier(weight, boost, scoreMode, maxDoc, MULTI_VALUE_MATCH_COST, predicate); + } + + private ScorerSupplier scorerSupplier( + LeafReaderContext context, + SortedSetDocValues sortedSetDocValues, + Weight weight, + float boost, + ScoreMode scoreMode + ) throws IOException { + final int maxDoc = context.reader().maxDoc(); + if (DocValues.unwrapSingleton(sortedSetDocValues) != null) { + // check for dense field + final Terms terms = context.reader().terms(fieldData.getFieldName()); + if (terms != null && terms.getDocCount() == maxDoc) { + return new DocIdSetIteratorScorerSupplier(weight, boost, scoreMode, DocIdSetIterator.all(maxDoc)); + } else { + return new PredicateScorerSupplier( + weight, + boost, + scoreMode, + maxDoc, + MULTI_VALUE_MATCH_COST, + sortedSetDocValues::advanceExact + ); + } + } + final CheckedIntPredicate predicate = doc -> { + if (false == sortedSetDocValues.advanceExact(doc)) { + return false; + } + if (sortedSetDocValues.docValueCount() != 1) { + warnings.registerException(MULTI_VALUE_EXCEPTION); + return false; + } + return true; + }; + return new PredicateScorerSupplier(weight, boost, scoreMode, maxDoc, MULTI_VALUE_MATCH_COST, predicate); + } + + private ScorerSupplier scorerSupplier( + LeafReaderContext context, + SortedBinaryDocValues sortedBinaryDocValues, + Weight weight, + float boost, + ScoreMode scoreMode + ) { + final int maxDoc = context.reader().maxDoc(); + if (FieldData.unwrapSingleton(sortedBinaryDocValues) != null) { + return new PredicateScorerSupplier( + weight, + boost, + scoreMode, + maxDoc, + MULTI_VALUE_MATCH_COST, + sortedBinaryDocValues::advanceExact + ); + } + final CheckedIntPredicate predicate = doc -> { + if (false == sortedBinaryDocValues.advanceExact(doc)) { + return false; + } + if (sortedBinaryDocValues.docValueCount() != 1) { + warnings.registerException(MULTI_VALUE_EXCEPTION); + return false; + } + return true; + }; + return new PredicateScorerSupplier(weight, boost, scoreMode, maxDoc, MULTI_VALUE_MATCH_COST, predicate); + } + }; + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + for (LeafReaderContext context : indexSearcher.getIndexReader().leaves()) { + final LeafFieldData lfd = fieldData.load(context); + if (lfd instanceof LeafNumericFieldData) { + final PointValues pointValues = context.reader().getPointValues(fieldData.getFieldName()); + if (pointValues == null + || pointValues.getDocCount() != context.reader().maxDoc() + || pointValues.size() != pointValues.getDocCount()) { + return super.rewrite(indexSearcher); + } + } else if (lfd instanceof LeafOrdinalsFieldData) { + final Terms terms = context.reader().terms(fieldData.getFieldName()); + if (terms == null || terms.getDocCount() != context.reader().maxDoc() || terms.size() != terms.getDocCount()) { + return super.rewrite(indexSearcher); + } + } else { + return super.rewrite(indexSearcher); + } + } + return new MatchAllDocsQuery(); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(fieldData.getFieldName())) { + visitor.visitLeaf(this); + } + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (obj == null || obj.getClass() != getClass()) { + return false; + } + final SingleValueMatchQuery other = (SingleValueMatchQuery) obj; + return fieldData.getFieldName().equals(other.fieldData.getFieldName()); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), fieldData.getFieldName()); + } + + private static class DocIdSetIteratorScorerSupplier extends ScorerSupplier { + + private final Weight weight; + private final float score; + private final ScoreMode scoreMode; + private final DocIdSetIterator docIdSetIterator; + + private DocIdSetIteratorScorerSupplier(Weight weight, float score, ScoreMode scoreMode, DocIdSetIterator docIdSetIterator) { + this.weight = weight; + this.score = score; + this.scoreMode = scoreMode; + this.docIdSetIterator = docIdSetIterator; + } + + @Override + public Scorer get(long leadCost) { + return new ConstantScoreScorer(weight, score, scoreMode, docIdSetIterator); + } + + @Override + public long cost() { + return docIdSetIterator.cost(); + } + } + + private static class PredicateScorerSupplier extends ScorerSupplier { + + private final Weight weight; + private final float score; + private final ScoreMode scoreMode; + private final int maxDoc; + private final int matchCost; + private final CheckedIntPredicate predicate; + + private PredicateScorerSupplier( + Weight weight, + float score, + ScoreMode scoreMode, + int maxDoc, + int matchCost, + CheckedIntPredicate predicate + ) { + this.weight = weight; + this.score = score; + this.scoreMode = scoreMode; + this.maxDoc = maxDoc; + this.matchCost = matchCost; + this.predicate = predicate; + } + + @Override + public Scorer get(long leadCost) { + TwoPhaseIterator iterator = new TwoPhaseIterator(DocIdSetIterator.all(maxDoc)) { + @Override + public boolean matches() throws IOException { + return predicate.test(approximation.docID()); + } + + @Override + public float matchCost() { + return matchCost; + } + }; + return new ConstantScoreScorer(weight, score, scoreMode, iterator); + } + + @Override + public long cost() { + return maxDoc; + } + } + + @FunctionalInterface + private interface CheckedIntPredicate { + boolean test(int doc) throws IOException; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuery.java index 4cd51b676fe89..07db69e6c5e51 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuery.java @@ -7,32 +7,15 @@ package org.elasticsearch.xpack.esql.querydsl.query; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PointValues; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.index.Terms; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.QueryVisitor; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.TwoPhaseIterator; -import org.apache.lucene.search.Weight; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.index.fielddata.IndexFieldData; -import org.elasticsearch.index.fielddata.LeafFieldData; -import org.elasticsearch.index.fielddata.LeafNumericFieldData; -import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData; -import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.MatchNoneQueryBuilder; @@ -71,8 +54,6 @@ public class SingleValueQuery extends Query { Builder::new ); - public static final String MULTI_VALUE_WARNING = "single-value function encountered multi-value"; - private final Query next; private final String field; @@ -84,7 +65,7 @@ public SingleValueQuery(Query next, String field) { @Override public Builder asBuilder() { - return new Builder(next.asBuilder(), field, new Stats(), next.source()); + return new Builder(next.asBuilder(), field, next.source()); } @Override @@ -114,13 +95,11 @@ public int hashCode() { public static class Builder extends AbstractQueryBuilder { private final QueryBuilder next; private final String field; - private final Stats stats; private final Source source; - Builder(QueryBuilder next, String field, Stats stats, Source source) { + Builder(QueryBuilder next, String field, Source source) { this.next = next; this.field = field; - this.stats = stats; this.source = source; } @@ -128,7 +107,6 @@ public static class Builder extends AbstractQueryBuilder { super(in); this.next = in.readNamedWriteable(QueryBuilder.class); this.field = in.readString(); - this.stats = new Stats(); if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_12_0)) { this.source = readSource(in); } else { @@ -181,28 +159,33 @@ public TransportVersion getMinimalSupportedVersion() { protected org.apache.lucene.search.Query doToQuery(SearchExecutionContext context) throws IOException { MappedFieldType ft = context.getFieldType(field); if (ft == null) { - stats.missingField++; return new MatchNoDocsQuery("missing field [" + field + "]"); } - return new LuceneQuery( - next.toQuery(context), + SingleValueMatchQuery singleValueQuery = new SingleValueMatchQuery( context.getForField(ft, MappedFieldType.FielddataOperation.SEARCH), - stats, new Warnings(source) ); + org.apache.lucene.search.Query rewrite = singleValueQuery.rewrite(context.searcher()); + if (rewrite instanceof MatchAllDocsQuery) { + // nothing to filter + return next.toQuery(context); + } + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(next.toQuery(context), BooleanClause.Occur.FILTER); + builder.add(rewrite, BooleanClause.Occur.FILTER); + return builder.build(); } @Override protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException { QueryBuilder rewritten = next.rewrite(queryRewriteContext); if (rewritten instanceof MatchNoneQueryBuilder) { - stats.rewrittenToMatchNone++; return rewritten; } if (rewritten == next) { return this; } - return new Builder(rewritten, field, stats, source); + return new Builder(rewritten, field, source); } @Override @@ -214,526 +197,6 @@ protected boolean doEquals(Builder other) { protected int doHashCode() { return Objects.hash(next, field); } - - Stats stats() { - return stats; - } - } - - private static class LuceneQuery extends org.apache.lucene.search.Query { - final org.apache.lucene.search.Query next; - private final IndexFieldData fieldData; - // mutable object for collecting stats and warnings, not really part of the query - private final Stats stats; - private final Warnings warnings; - - LuceneQuery(org.apache.lucene.search.Query next, IndexFieldData fieldData, Stats stats, Warnings warnings) { - this.next = next; - this.fieldData = fieldData; - this.stats = stats; - this.warnings = warnings; - } - - @Override - public void visit(QueryVisitor visitor) { - if (visitor.acceptField(fieldData.getFieldName())) { - visitor.visitLeaf(next); - } - } - - @Override - public org.apache.lucene.search.Query rewrite(IndexReader reader) throws IOException { - org.apache.lucene.search.Query rewritten = next.rewrite(reader); - if (rewritten instanceof MatchNoDocsQuery) { - stats.rewrittenToMatchNone++; - return rewritten; - } - if (rewritten == next) { - return this; - } - return new LuceneQuery(rewritten, fieldData, stats, warnings); - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new SingleValueWeight(this, next.createWeight(searcher, scoreMode, boost), fieldData, warnings); - } - - @Override - public boolean equals(Object obj) { - if (obj == this) { - return true; - } - if (obj == null || obj.getClass() != getClass()) { - return false; - } - SingleValueQuery.LuceneQuery other = (SingleValueQuery.LuceneQuery) obj; - return next.equals(other.next) && fieldData.getFieldName().equals(other.fieldData.getFieldName()); - } - - @Override - public int hashCode() { - return Objects.hash(classHash(), next, fieldData.getFieldName()); - } - - @Override - public String toString(String field) { - StringBuilder builder = new StringBuilder("single_value("); - if (false == this.fieldData.getFieldName().equals(field)) { - builder.append(this.fieldData.getFieldName()); - builder.append(":"); - } - builder.append(next); - return builder.append(")").toString(); - } - } - - private static class SingleValueWeight extends Weight { - private final Stats stats; - private final Weight next; - private final IndexFieldData fieldData; - private final Warnings warnings; - - private SingleValueWeight(SingleValueQuery.LuceneQuery query, Weight next, IndexFieldData fieldData, Warnings warnings) { - super(query); - this.stats = query.stats; - this.next = next; - this.fieldData = fieldData; - this.warnings = warnings; - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - Explanation nextExplanation = next.explain(context, doc); - if (false == nextExplanation.isMatch()) { - return Explanation.noMatch("next didn't match", nextExplanation); - } - LeafFieldData lfd = fieldData.load(context); - SortedBinaryDocValues values = lfd.getBytesValues(); - if (false == values.advanceExact(doc)) { - return Explanation.noMatch("no values in field", nextExplanation); - } - if (values.docValueCount() != 1) { - return Explanation.noMatch("field has too many values [" + values.docValueCount() + "]", nextExplanation); - } - return Explanation.match(nextExplanation.getValue(), "field has exactly 1 value", nextExplanation); - } - - @Override - public Scorer scorer(LeafReaderContext context) throws IOException { - Scorer nextScorer = next.scorer(context); - if (nextScorer == null) { - stats.noNextScorer++; - return null; - } - LeafFieldData lfd = fieldData.load(context); - /* - * SortedBinaryDocValues are available for most fields, but they - * are made available by eagerly converting non-bytes values to - * utf-8 strings. The eager conversion is quite expensive. So - * we specialize on numeric fields and fields with ordinals to - * avoid that expense in at least that case. - * - * Also! Lucene's FieldExistsQuery only needs one scorer that can - * use all the docs values iterators at DocIdSetIterators. We - * can't do that because we need the check the number of fields. - */ - if (lfd instanceof LeafNumericFieldData n) { - return scorer(context, nextScorer, n); - } - if (lfd instanceof LeafOrdinalsFieldData o) { - return scorer(context, nextScorer, o); - } - return scorer(nextScorer, lfd); - } - - private Scorer scorer(LeafReaderContext context, Scorer nextScorer, LeafNumericFieldData lfd) throws IOException { - SortedNumericDocValues sortedNumerics = lfd.getLongValues(); - if (DocValues.unwrapSingleton(sortedNumerics) != null) { - /* - * Segment contains only single valued fields. But it's possible - * that some fields have 0 values. The most surefire way to check - * is to look at the index for the data. If there isn't an index - * this isn't going to work - but if there is we can compare the - * number of documents in the index to the number of values in it - - * if they are the same we've got a dense singleton. - */ - PointValues points = context.reader().getPointValues(fieldData.getFieldName()); - if (points != null && points.getDocCount() == context.reader().maxDoc()) { - stats.numericSingle++; - return nextScorer; - } - } - TwoPhaseIterator nextIterator = nextScorer.twoPhaseIterator(); - if (nextIterator == null) { - stats.numericMultiNoApprox++; - return new SingleValueQueryScorer( - this, - nextScorer, - new TwoPhaseIteratorForSortedNumericsAndSinglePhaseQueries(nextScorer.iterator(), sortedNumerics, warnings) - ); - } - stats.numericMultiApprox++; - return new SingleValueQueryScorer( - this, - nextScorer, - new TwoPhaseIteratorForSortedNumericsAndTwoPhaseQueries(nextIterator, sortedNumerics, warnings) - ); - } - - private Scorer scorer(LeafReaderContext context, Scorer nextScorer, LeafOrdinalsFieldData lfd) throws IOException { - SortedSetDocValues sortedSet = lfd.getOrdinalsValues(); - if (DocValues.unwrapSingleton(sortedSet) != null) { - /* - * Segment contains only single valued fields. But it's possible - * that some fields have 0 values. The most surefire way to check - * is to look at the index for the data. If there isn't an index - * this isn't going to work - but if there is we can compare the - * number of documents in the index to the number of values in it - - * if they are the same we've got a dense singleton. - */ - Terms terms = context.reader().terms(fieldData.getFieldName()); - if (terms != null && terms.getDocCount() == context.reader().maxDoc()) { - stats.ordinalsSingle++; - return nextScorer; - } - } - TwoPhaseIterator nextIterator = nextScorer.twoPhaseIterator(); - if (nextIterator == null) { - stats.ordinalsMultiNoApprox++; - return new SingleValueQueryScorer( - this, - nextScorer, - new TwoPhaseIteratorForSortedSetAndSinglePhaseQueries(nextScorer.iterator(), sortedSet, warnings) - ); - } - stats.ordinalsMultiApprox++; - return new SingleValueQueryScorer( - this, - nextScorer, - new TwoPhaseIteratorForSortedSetAndTwoPhaseQueries(nextIterator, sortedSet, warnings) - ); - } - - private Scorer scorer(Scorer nextScorer, LeafFieldData lfd) { - SortedBinaryDocValues sortedBinary = lfd.getBytesValues(); - TwoPhaseIterator nextIterator = nextScorer.twoPhaseIterator(); - if (nextIterator == null) { - stats.bytesNoApprox++; - return new SingleValueQueryScorer( - this, - nextScorer, - new TwoPhaseIteratorForSortedBinaryAndSinglePhaseQueries(nextScorer.iterator(), sortedBinary, warnings) - ); - } - stats.bytesApprox++; - return new SingleValueQueryScorer( - this, - nextScorer, - new TwoPhaseIteratorForSortedBinaryAndTwoPhaseQueries(nextIterator, sortedBinary, warnings) - ); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - // we cannot cache this query because we loose the ability of emitting warnings - return false; - } - } - - private static class SingleValueQueryScorer extends Scorer { - private final Scorer next; - private final TwoPhaseIterator iterator; - - private SingleValueQueryScorer(Weight weight, Scorer next, TwoPhaseIterator iterator) { - super(weight); - this.next = next; - this.iterator = iterator; - } - - @Override - public DocIdSetIterator iterator() { - return TwoPhaseIterator.asDocIdSetIterator(iterator); - } - - @Override - public TwoPhaseIterator twoPhaseIterator() { - return iterator; - } - - @Override - public float getMaxScore(int upTo) throws IOException { - return next.getMaxScore(upTo); - } - - @Override - public float score() throws IOException { - return next.score(); - } - - @Override - public int docID() { - return next.docID(); - } - } - - /** - * The estimated number of comparisons to check if a {@link SortedNumericDocValues} - * has more than one value. There isn't a good way to get that number out of - * {@link SortedNumericDocValues} so this is a guess. - */ - private static final int SORTED_NUMERIC_MATCH_COST = 10; - - private static class TwoPhaseIteratorForSortedNumericsAndSinglePhaseQueries extends TwoPhaseIterator { - private final SortedNumericDocValues sortedNumerics; - private final Warnings warnings; - - private TwoPhaseIteratorForSortedNumericsAndSinglePhaseQueries( - DocIdSetIterator approximation, - SortedNumericDocValues sortedNumerics, - Warnings warning - ) { - super(approximation); - this.sortedNumerics = sortedNumerics; - this.warnings = warning; - } - - @Override - public boolean matches() throws IOException { - if (false == sortedNumerics.advanceExact(approximation.docID())) { - return false; - } - if (sortedNumerics.docValueCount() != 1) { - warnings.registerException(new IllegalArgumentException(MULTI_VALUE_WARNING)); - return false; - } - return true; - } - - @Override - public float matchCost() { - return SORTED_NUMERIC_MATCH_COST; - } - } - - private static class TwoPhaseIteratorForSortedNumericsAndTwoPhaseQueries extends TwoPhaseIterator { - private final SortedNumericDocValues sortedNumerics; - private final TwoPhaseIterator next; - private final Warnings warnings; - - private TwoPhaseIteratorForSortedNumericsAndTwoPhaseQueries( - TwoPhaseIterator next, - SortedNumericDocValues sortedNumerics, - Warnings warnings - ) { - super(next.approximation()); - this.sortedNumerics = sortedNumerics; - this.next = next; - this.warnings = warnings; - } - - @Override - public boolean matches() throws IOException { - if (false == sortedNumerics.advanceExact(approximation.docID())) { - return false; - } - if (sortedNumerics.docValueCount() != 1) { - warnings.registerException(new IllegalArgumentException(MULTI_VALUE_WARNING)); - return false; - } - return next.matches(); - } - - @Override - public float matchCost() { - return SORTED_NUMERIC_MATCH_COST + next.matchCost(); - } - } - - private static class TwoPhaseIteratorForSortedBinaryAndSinglePhaseQueries extends TwoPhaseIterator { - private final SortedBinaryDocValues sortedBinary; - private final Warnings warnings; - - private TwoPhaseIteratorForSortedBinaryAndSinglePhaseQueries( - DocIdSetIterator approximation, - SortedBinaryDocValues sortedBinary, - Warnings warnings - ) { - super(approximation); - this.sortedBinary = sortedBinary; - this.warnings = warnings; - } - - @Override - public boolean matches() throws IOException { - if (false == sortedBinary.advanceExact(approximation.docID())) { - return false; - } - if (sortedBinary.docValueCount() != 1) { - warnings.registerException(new IllegalArgumentException(MULTI_VALUE_WARNING)); - return false; - } - return true; - } - - @Override - public float matchCost() { - return SORTED_NUMERIC_MATCH_COST; - } - } - - private static class TwoPhaseIteratorForSortedSetAndTwoPhaseQueries extends TwoPhaseIterator { - private final SortedSetDocValues sortedSet; - private final TwoPhaseIterator next; - private final Warnings warnings; - - private TwoPhaseIteratorForSortedSetAndTwoPhaseQueries(TwoPhaseIterator next, SortedSetDocValues sortedSet, Warnings warnings) { - super(next.approximation()); - this.sortedSet = sortedSet; - this.next = next; - this.warnings = warnings; - } - - @Override - public boolean matches() throws IOException { - if (false == sortedSet.advanceExact(approximation.docID())) { - return false; - } - if (sortedSet.docValueCount() != 1) { - warnings.registerException(new IllegalArgumentException(MULTI_VALUE_WARNING)); - return false; - } - return next.matches(); - } - - @Override - public float matchCost() { - return SORTED_NUMERIC_MATCH_COST + next.matchCost(); - } } - private static class TwoPhaseIteratorForSortedSetAndSinglePhaseQueries extends TwoPhaseIterator { - private final SortedSetDocValues sortedSet; - private final Warnings warnings; - - private TwoPhaseIteratorForSortedSetAndSinglePhaseQueries( - DocIdSetIterator approximation, - SortedSetDocValues sortedSet, - Warnings warnings - ) { - super(approximation); - this.sortedSet = sortedSet; - this.warnings = warnings; - } - - @Override - public boolean matches() throws IOException { - if (false == sortedSet.advanceExact(approximation.docID())) { - return false; - } - if (sortedSet.docValueCount() != 1) { - warnings.registerException(new IllegalArgumentException(MULTI_VALUE_WARNING)); - return false; - } - return true; - } - - @Override - public float matchCost() { - return SORTED_NUMERIC_MATCH_COST; - } - } - - private static class TwoPhaseIteratorForSortedBinaryAndTwoPhaseQueries extends TwoPhaseIterator { - private final SortedBinaryDocValues sortedBinary; - private final TwoPhaseIterator next; - private final Warnings warnings; - - private TwoPhaseIteratorForSortedBinaryAndTwoPhaseQueries( - TwoPhaseIterator next, - SortedBinaryDocValues sortedBinary, - Warnings warnings - ) { - super(next.approximation()); - this.sortedBinary = sortedBinary; - this.next = next; - this.warnings = warnings; - } - - @Override - public boolean matches() throws IOException { - if (false == sortedBinary.advanceExact(approximation.docID())) { - return false; - } - if (sortedBinary.docValueCount() != 1) { - warnings.registerException(new IllegalArgumentException(MULTI_VALUE_WARNING)); - return false; - } - return next.matches(); - } - - @Override - public float matchCost() { - return SORTED_NUMERIC_MATCH_COST + next.matchCost(); - } - } - - static class Stats { - // TODO expose stats somehow - private int missingField; - private int rewrittenToMatchNone; - private int noNextScorer; - private int numericSingle; - private int numericMultiNoApprox; - private int numericMultiApprox; - private int ordinalsSingle; - private int ordinalsMultiNoApprox; - private int ordinalsMultiApprox; - private int bytesNoApprox; - private int bytesApprox; - - int missingField() { - return missingField; - } - - int rewrittenToMatchNone() { - return rewrittenToMatchNone; - } - - int noNextScorer() { - return noNextScorer; - } - - int numericSingle() { - return numericSingle; - } - - int numericMultiNoApprox() { - return numericMultiNoApprox; - } - - int numericMultiApprox() { - return numericMultiApprox; - } - - int ordinalsSingle() { - return ordinalsSingle; - } - - int ordinalsMultiNoApprox() { - return ordinalsMultiNoApprox; - } - - int ordinalsMultiApprox() { - return ordinalsMultiApprox; - } - - int bytesNoApprox() { - return bytesNoApprox; - } - - int bytesApprox() { - return bytesApprox; - } - } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMathQueryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMathQueryTests.java new file mode 100644 index 0000000000000..f49dfe67e591a --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMathQueryTests.java @@ -0,0 +1,203 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.querydsl.query; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.KeywordField; +import org.apache.lucene.document.LongField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperServiceTestCase; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.Warnings; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.sameInstance; + +public class SingleValueMathQueryTests extends MapperServiceTestCase { + interface Setup { + XContentBuilder mapping(XContentBuilder builder) throws IOException; + + List> build(RandomIndexWriter iw) throws IOException; + + void assertRewrite(IndexSearcher indexSearcher, Query query) throws IOException; + } + + @ParametersFactory + public static List params() { + List params = new ArrayList<>(); + for (String fieldType : new String[] { "long", "integer", "short", "byte", "double", "float", "keyword" }) { + for (boolean multivaluedField : new boolean[] { true, false }) { + for (boolean allowEmpty : new boolean[] { true, false }) { + params.add(new Object[] { new StandardSetup(fieldType, multivaluedField, allowEmpty, 100) }); + } + } + } + return params; + } + + private final Setup setup; + + public SingleValueMathQueryTests(Setup setup) { + this.setup = setup; + } + + public void testQuery() throws IOException { + MapperService mapper = createMapperService(mapping(setup::mapping)); + try (Directory d = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), d)) { + List> fieldValues = setup.build(iw); + try (IndexReader reader = iw.getReader()) { + SearchExecutionContext ctx = createSearchExecutionContext(mapper, new IndexSearcher(reader)); + Query query = new SingleValueMatchQuery( + ctx.getForField(mapper.fieldType("foo"), MappedFieldType.FielddataOperation.SEARCH), + new Warnings(Source.EMPTY) + ); + runCase(fieldValues, ctx.searcher().count(query)); + setup.assertRewrite(ctx.searcher(), query); + } + } + } + + public void testEmpty() throws IOException { + MapperService mapper = createMapperService(mapping(setup::mapping)); + try (Directory d = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), d)) { + try (IndexReader reader = iw.getReader()) { + SearchExecutionContext ctx = createSearchExecutionContext(mapper, new IndexSearcher(reader)); + Query query = new SingleValueMatchQuery( + ctx.getForField(mapper.fieldType("foo"), MappedFieldType.FielddataOperation.SEARCH), + new Warnings(Source.EMPTY) + ); + runCase(List.of(), ctx.searcher().count(query)); + } + } + } + + private void runCase(List> fieldValues, int count) { + int expected = 0; + int mvCountInRange = 0; + for (int i = 0; i < fieldValues.size(); i++) { + int valuesCount = fieldValues.get(i).size(); + if (valuesCount == 1) { + expected++; + } else if (valuesCount > 1) { + mvCountInRange++; + } + } + assertThat(count, equalTo(expected)); + // the SingleValueQuery.TwoPhaseIteratorForSortedNumericsAndTwoPhaseQueries can scan all docs - and generate warnings - even if + // inner query matches none, so warn if MVs have been encountered within given range, OR if a full scan is required + if (mvCountInRange > 0) { + assertWarnings( + "Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.", + "Line -1:-1: java.lang.IllegalArgumentException: single-value function encountered multi-value" + ); + } + } + + private record StandardSetup(String fieldType, boolean multivaluedField, boolean empty, int count) implements Setup { + @Override + public XContentBuilder mapping(XContentBuilder builder) throws IOException { + return builder.startObject("foo").field("type", fieldType).endObject(); + } + + @Override + public List> build(RandomIndexWriter iw) throws IOException { + List> fieldValues = new ArrayList<>(100); + for (int i = 0; i < count; i++) { + List values = values(i); + fieldValues.add(values); + iw.addDocument(docFor(values)); + } + return fieldValues; + } + + @Override + public void assertRewrite(IndexSearcher indexSearcher, Query query) throws IOException { + if (empty == false && multivaluedField == false) { + assertThat(query.rewrite(indexSearcher), instanceOf(MatchAllDocsQuery.class)); + } else { + assertThat(query.rewrite(indexSearcher), sameInstance(query)); + } + } + + private List values(int i) { + // i == 10 forces at least one multivalued field when we're configured for multivalued fields + boolean makeMultivalued = multivaluedField && (i == 10 || randomBoolean()); + if (makeMultivalued) { + int count = between(2, 10); + Set set = new HashSet<>(count); + while (set.size() < count) { + set.add(randomValue()); + } + return List.copyOf(set); + } + // i == 0 forces at least one empty field when we're configured for empty fields + if (empty && (i == 0 || randomBoolean())) { + return List.of(); + } + return List.of(randomValue()); + } + + private Object randomValue() { + return switch (fieldType) { + case "long" -> randomLong(); + case "integer" -> randomInt(); + case "short" -> randomShort(); + case "byte" -> randomByte(); + case "double" -> randomDouble(); + case "float" -> randomFloat(); + case "keyword" -> randomAlphaOfLength(5); + default -> throw new UnsupportedOperationException(); + }; + } + + private List docFor(Iterable values) { + List fields = new ArrayList<>(); + switch (fieldType) { + case "long", "integer", "short", "byte" -> { + for (Object v : values) { + long l = ((Number) v).longValue(); + fields.add(new LongField("foo", l, Field.Store.NO)); + } + } + case "double", "float" -> { + for (Object v : values) { + double d = ((Number) v).doubleValue(); + fields.add(new DoubleField("foo", d, Field.Store.NO)); + } + } + case "keyword" -> { + for (Object v : values) { + fields.add(new KeywordField("foo", v.toString(), Field.Store.NO)); + } + } + default -> throw new UnsupportedOperationException(); + } + return fields; + } + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuerySerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuerySerializationTests.java index 34c66675fccdd..a3bf34ad38b8e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuerySerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuerySerializationTests.java @@ -19,7 +19,7 @@ public class SingleValueQuerySerializationTests extends AbstractWireSerializingTestCase { @Override protected SingleValueQuery.Builder createTestInstance() { - return new SingleValueQuery.Builder(randomQuery(), randomFieldName(), new SingleValueQuery.Stats(), Source.EMPTY); + return new SingleValueQuery.Builder(randomQuery(), randomFieldName(), Source.EMPTY); } private QueryBuilder randomQuery() { @@ -36,13 +36,11 @@ protected SingleValueQuery.Builder mutateInstance(SingleValueQuery.Builder insta case 0 -> new SingleValueQuery.Builder( randomValueOtherThan(instance.next(), this::randomQuery), instance.field(), - new SingleValueQuery.Stats(), Source.EMPTY ); case 1 -> new SingleValueQuery.Builder( instance.next(), randomValueOtherThan(instance.field(), this::randomFieldName), - new SingleValueQuery.Stats(), Source.EMPTY ); default -> throw new IllegalArgumentException(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java index f26e819685789..5d89ef3350193 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java @@ -40,17 +40,12 @@ import java.util.Set; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.greaterThanOrEqualTo; -import static org.hamcrest.Matchers.instanceOf; public class SingleValueQueryTests extends MapperServiceTestCase { interface Setup { XContentBuilder mapping(XContentBuilder builder) throws IOException; List> build(RandomIndexWriter iw) throws IOException; - - void assertStats(SingleValueQuery.Builder builder, YesNoSometimes subHasTwoPhase); } @ParametersFactory @@ -74,47 +69,31 @@ public SingleValueQueryTests(Setup setup) { } public void testMatchAll() throws IOException { - testCase(new SingleValueQuery(new MatchAll(Source.EMPTY), "foo").asBuilder(), YesNoSometimes.NO, YesNoSometimes.NO, this::runCase); + testCase(new SingleValueQuery(new MatchAll(Source.EMPTY), "foo").asBuilder(), this::runCase); } public void testMatchSome() throws IOException { int max = between(1, 100); testCase( - new SingleValueQuery.Builder(new RangeQueryBuilder("i").lt(max), "foo", new SingleValueQuery.Stats(), Source.EMPTY), - YesNoSometimes.SOMETIMES, - YesNoSometimes.NO, + new SingleValueQuery.Builder(new RangeQueryBuilder("i").lt(max), "foo", Source.EMPTY), (fieldValues, count) -> runCase(fieldValues, count, null, max, false) ); } public void testSubPhrase() throws IOException { - testCase( - new SingleValueQuery.Builder( - new MatchPhraseQueryBuilder("str", "fox jumped"), - "foo", - new SingleValueQuery.Stats(), - Source.EMPTY - ), - YesNoSometimes.NO, - YesNoSometimes.YES, - this::runCase - ); + testCase(new SingleValueQuery.Builder(new MatchPhraseQueryBuilder("str", "fox jumped"), "foo", Source.EMPTY), this::runCase); } public void testMatchNone() throws IOException { testCase( - new SingleValueQuery.Builder(new MatchNoneQueryBuilder(), "foo", new SingleValueQuery.Stats(), Source.EMPTY), - YesNoSometimes.YES, - YesNoSometimes.NO, + new SingleValueQuery.Builder(new MatchNoneQueryBuilder(), "foo", Source.EMPTY), (fieldValues, count) -> assertThat(count, equalTo(0)) ); } public void testRewritesToMatchNone() throws IOException { testCase( - new SingleValueQuery.Builder(new TermQueryBuilder("missing", 0), "foo", new SingleValueQuery.Stats(), Source.EMPTY), - YesNoSometimes.YES, - YesNoSometimes.NO, + new SingleValueQuery.Builder(new TermQueryBuilder("missing", 0), "foo", Source.EMPTY), (fieldValues, count) -> assertThat(count, equalTo(0)) ); } @@ -122,8 +101,6 @@ public void testRewritesToMatchNone() throws IOException { public void testNotMatchAll() throws IOException { testCase( new SingleValueQuery(new MatchAll(Source.EMPTY), "foo").negate(Source.EMPTY).asBuilder(), - YesNoSometimes.YES, - YesNoSometimes.NO, (fieldValues, count) -> assertThat(count, equalTo(0)) ); } @@ -131,8 +108,6 @@ public void testNotMatchAll() throws IOException { public void testNotMatchNone() throws IOException { testCase( new SingleValueQuery(new MatchAll(Source.EMPTY).negate(Source.EMPTY), "foo").negate(Source.EMPTY).asBuilder(), - YesNoSometimes.NO, - YesNoSometimes.NO, this::runCase ); } @@ -141,8 +116,6 @@ public void testNotMatchSome() throws IOException { int max = between(1, 100); testCase( new SingleValueQuery(new RangeQuery(Source.EMPTY, "i", null, false, max, false, null), "foo").negate(Source.EMPTY).asBuilder(), - YesNoSometimes.SOMETIMES, - YesNoSometimes.SOMETIMES, (fieldValues, count) -> runCase(fieldValues, count, max, 100, true) ); } @@ -191,18 +164,7 @@ private void runCase(List> fieldValues, int count) { runCase(fieldValues, count, null, null, false); } - enum YesNoSometimes { - YES, - NO, - SOMETIMES; - } - - private void testCase( - SingleValueQuery.Builder builder, - YesNoSometimes rewritesToMatchNone, - YesNoSometimes subHasTwoPhase, - TestCase testCase - ) throws IOException { + private void testCase(SingleValueQuery.Builder builder, TestCase testCase) throws IOException { MapperService mapper = createMapperService(mapping(setup::mapping)); try (Directory d = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), d)) { List> fieldValues = setup.build(iw); @@ -211,25 +173,6 @@ private void testCase( QueryBuilder rewritten = builder.rewrite(ctx); Query query = rewritten.toQuery(ctx); testCase.run(fieldValues, ctx.searcher().count(query)); - if (rewritesToMatchNone == YesNoSometimes.YES) { - assertThat(rewritten, instanceOf(MatchNoneQueryBuilder.class)); - assertThat(builder.stats().missingField(), equalTo(0)); - assertThat(builder.stats().rewrittenToMatchNone(), equalTo(1)); - assertThat(builder.stats().numericSingle(), equalTo(0)); - assertThat(builder.stats().numericMultiNoApprox(), equalTo(0)); - assertThat(builder.stats().numericMultiApprox(), equalTo(0)); - assertThat(builder.stats().ordinalsSingle(), equalTo(0)); - assertThat(builder.stats().ordinalsMultiNoApprox(), equalTo(0)); - assertThat(builder.stats().ordinalsMultiApprox(), equalTo(0)); - assertThat(builder.stats().bytesApprox(), equalTo(0)); - assertThat(builder.stats().bytesNoApprox(), equalTo(0)); - } else { - assertThat(builder.stats().rewrittenToMatchNone(), equalTo(0)); - setup.assertStats(builder, subHasTwoPhase); - } - if (rewritesToMatchNone != YesNoSometimes.SOMETIMES) { - assertThat(builder.stats().noNextScorer(), equalTo(0)); - } assertEqualsAndHashcodeStable(query, rewritten.toQuery(ctx)); } } @@ -316,73 +259,6 @@ private List docFor(int i, Iterable values) { } return fields; } - - @Override - public void assertStats(SingleValueQuery.Builder builder, YesNoSometimes subHasTwoPhase) { - assertThat(builder.stats().missingField(), equalTo(0)); - switch (fieldType) { - case "long", "integer", "short", "byte", "double", "float" -> { - assertThat(builder.stats().ordinalsSingle(), equalTo(0)); - assertThat(builder.stats().ordinalsMultiNoApprox(), equalTo(0)); - assertThat(builder.stats().ordinalsMultiApprox(), equalTo(0)); - assertThat(builder.stats().bytesApprox(), equalTo(0)); - assertThat(builder.stats().bytesNoApprox(), equalTo(0)); - - if (multivaluedField || empty) { - assertThat(builder.stats().numericSingle(), greaterThanOrEqualTo(0)); - switch (subHasTwoPhase) { - case YES -> { - assertThat(builder.stats().numericMultiNoApprox(), equalTo(0)); - assertThat(builder.stats().numericMultiApprox(), greaterThan(0)); - } - case NO -> { - assertThat(builder.stats().numericMultiNoApprox(), greaterThan(0)); - assertThat(builder.stats().numericMultiApprox(), equalTo(0)); - } - case SOMETIMES -> { - assertThat(builder.stats().numericMultiNoApprox() + builder.stats().numericMultiApprox(), greaterThan(0)); - assertThat(builder.stats().numericMultiNoApprox(), greaterThanOrEqualTo(0)); - assertThat(builder.stats().numericMultiApprox(), greaterThanOrEqualTo(0)); - } - } - } else { - assertThat(builder.stats().numericSingle(), greaterThan(0)); - assertThat(builder.stats().numericMultiNoApprox(), equalTo(0)); - assertThat(builder.stats().numericMultiApprox(), equalTo(0)); - } - } - case "keyword" -> { - assertThat(builder.stats().numericSingle(), equalTo(0)); - assertThat(builder.stats().numericMultiNoApprox(), equalTo(0)); - assertThat(builder.stats().numericMultiApprox(), equalTo(0)); - assertThat(builder.stats().bytesApprox(), equalTo(0)); - assertThat(builder.stats().bytesNoApprox(), equalTo(0)); - if (multivaluedField || empty) { - assertThat(builder.stats().ordinalsSingle(), greaterThanOrEqualTo(0)); - switch (subHasTwoPhase) { - case YES -> { - assertThat(builder.stats().ordinalsMultiNoApprox(), equalTo(0)); - assertThat(builder.stats().ordinalsMultiApprox(), greaterThan(0)); - } - case NO -> { - assertThat(builder.stats().ordinalsMultiNoApprox(), greaterThan(0)); - assertThat(builder.stats().ordinalsMultiApprox(), equalTo(0)); - } - case SOMETIMES -> { - assertThat(builder.stats().ordinalsMultiNoApprox() + builder.stats().ordinalsMultiApprox(), greaterThan(0)); - assertThat(builder.stats().ordinalsMultiNoApprox(), greaterThanOrEqualTo(0)); - assertThat(builder.stats().ordinalsMultiApprox(), greaterThanOrEqualTo(0)); - } - } - } else { - assertThat(builder.stats().ordinalsSingle(), greaterThan(0)); - assertThat(builder.stats().ordinalsMultiNoApprox(), equalTo(0)); - assertThat(builder.stats().ordinalsMultiApprox(), equalTo(0)); - } - } - default -> throw new UnsupportedOperationException(); - } - } } private record FieldMissingSetup() implements Setup { @@ -403,18 +279,5 @@ public List> build(RandomIndexWriter iw) throws IOException { } return fieldValues; } - - @Override - public void assertStats(SingleValueQuery.Builder builder, YesNoSometimes subHasTwoPhase) { - assertThat(builder.stats().missingField(), equalTo(1)); - assertThat(builder.stats().numericSingle(), equalTo(0)); - assertThat(builder.stats().numericMultiNoApprox(), equalTo(0)); - assertThat(builder.stats().numericMultiApprox(), equalTo(0)); - assertThat(builder.stats().ordinalsSingle(), equalTo(0)); - assertThat(builder.stats().ordinalsMultiNoApprox(), equalTo(0)); - assertThat(builder.stats().ordinalsMultiApprox(), equalTo(0)); - assertThat(builder.stats().bytesApprox(), equalTo(0)); - assertThat(builder.stats().bytesNoApprox(), equalTo(0)); - } } } From e9beb5407fc4482cf90ffc26856e7da007457caf Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Wed, 17 Jul 2024 14:18:25 +0200 Subject: [PATCH 16/65] Disable tests when AdaptiveAllocationsFeatureFlag is disabled (#110969) --- .../xpack/inference/services/ServiceUtilsTests.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ServiceUtilsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ServiceUtilsTests.java index 86af5e431d78d..76f095236af8a 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ServiceUtilsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ServiceUtilsTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingByteResults; import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; +import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsFeatureFlag; import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; import org.elasticsearch.xpack.inference.results.InferenceTextEmbeddingByteResultsTests; import org.elasticsearch.xpack.inference.results.TextEmbeddingResultsTests; @@ -289,6 +290,8 @@ public void testRemoveAsOneOfTypesMissingReturnsNull() { } public void testRemoveAsAdaptiveAllocationsSettings() { + assumeTrue("Should only run if adaptive allocations feature flag is enabled", AdaptiveAllocationsFeatureFlag.isEnabled()); + Map map = new HashMap<>( Map.of("settings", new HashMap<>(Map.of("enabled", true, "min_number_of_allocations", 7, "max_number_of_allocations", 42))) ); @@ -311,6 +314,8 @@ public void testRemoveAsAdaptiveAllocationsSettings() { } public void testRemoveAsAdaptiveAllocationsSettings_exceptions() { + assumeTrue("Should only run if adaptive allocations feature flag is enabled", AdaptiveAllocationsFeatureFlag.isEnabled()); + Map map = new HashMap<>( Map.of("settings", new HashMap<>(Map.of("enabled", "YES!", "blah", 42, "max_number_of_allocations", -7))) ); From b5c39b700532a08ce9d23706c88438a215992082 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 17 Jul 2024 15:13:12 +0200 Subject: [PATCH 17/65] Optimize transport_thread exectures search logic (#110965) We can save a bunch of allocations and cycles in a few spots, the blocks checks in particular are very heavy if done in a loop over many indices. Also, presizing some collections, avoiding temporary collections where not necessary etc. helps a bit as well. None of this is very relevant for searches over few indices but all of it becomes quite visible in profiling once you target a larger number of indices/shards. --- .../TransportFieldCapabilitiesAction.java | 9 +- .../action/search/TransportSearchAction.java | 89 ++++++++++++------- .../search/sort/FieldSortBuilder.java | 6 +- 3 files changed, 69 insertions(+), 35 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java b/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java index 57f7081838c43..41bf42b4e4e9c 100644 --- a/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java +++ b/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java @@ -279,9 +279,14 @@ private void doExecuteForked( } private static void checkIndexBlocks(ClusterState clusterState, String[] concreteIndices) { - clusterState.blocks().globalBlockedRaiseException(ClusterBlockLevel.READ); + var blocks = clusterState.blocks(); + if (blocks.global().isEmpty() && blocks.indices().isEmpty()) { + // short circuit optimization because block check below is relatively expensive for many indices + return; + } + blocks.globalBlockedRaiseException(ClusterBlockLevel.READ); for (String index : concreteIndices) { - clusterState.blocks().indexBlockedRaiseException(ClusterBlockLevel.READ, index); + blocks.indexBlockedRaiseException(ClusterBlockLevel.READ, index); } } diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java index 6d2b67a1e0f55..84d233ec9710a 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java @@ -48,6 +48,7 @@ import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; +import org.elasticsearch.common.util.ArrayUtils; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.common.util.Maps; @@ -100,10 +101,8 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; import java.util.function.BiFunction; -import java.util.function.BooleanSupplier; import java.util.function.Function; import java.util.function.LongSupplier; -import java.util.stream.StreamSupport; import static org.elasticsearch.action.search.SearchType.DFS_QUERY_THEN_FETCH; import static org.elasticsearch.action.search.SearchType.QUERY_THEN_FETCH; @@ -198,9 +197,14 @@ private Map buildPerIndexOriginalIndices( String[] indices, IndicesOptions indicesOptions ) { - Map res = new HashMap<>(); + Map res = Maps.newMapWithExpectedSize(indices.length); + var blocks = clusterState.blocks(); + // optimization: mostly we do not have any blocks so there's no point in the expensive per-index checking + boolean hasBlocks = blocks.global().isEmpty() == false || blocks.indices().isEmpty() == false; for (String index : indices) { - clusterState.blocks().indexBlockedRaiseException(ClusterBlockLevel.READ, index); + if (hasBlocks) { + blocks.indexBlockedRaiseException(ClusterBlockLevel.READ, index); + } String[] aliases = indexNameExpressionResolver.indexAliases( clusterState, @@ -210,23 +214,27 @@ private Map buildPerIndexOriginalIndices( true, indicesAndAliases ); - BooleanSupplier hasDataStreamRef = () -> { - IndexAbstraction ret = clusterState.getMetadata().getIndicesLookup().get(index); - if (ret == null || ret.getParentDataStream() == null) { - return false; - } - return indicesAndAliases.contains(ret.getParentDataStream().getName()); - }; - List finalIndices = new ArrayList<>(); - if (aliases == null || aliases.length == 0 || indicesAndAliases.contains(index) || hasDataStreamRef.getAsBoolean()) { - finalIndices.add(index); + String[] finalIndices = Strings.EMPTY_ARRAY; + if (aliases == null + || aliases.length == 0 + || indicesAndAliases.contains(index) + || hasDataStreamRef(clusterState, indicesAndAliases, index)) { + finalIndices = new String[] { index }; } if (aliases != null) { - finalIndices.addAll(Arrays.asList(aliases)); + finalIndices = finalIndices.length == 0 ? aliases : ArrayUtils.concat(finalIndices, aliases); } - res.put(index, new OriginalIndices(finalIndices.toArray(String[]::new), indicesOptions)); + res.put(index, new OriginalIndices(finalIndices, indicesOptions)); + } + return res; + } + + private static boolean hasDataStreamRef(ClusterState clusterState, Set indicesAndAliases, String index) { + IndexAbstraction ret = clusterState.getMetadata().getIndicesLookup().get(index); + if (ret == null || ret.getParentDataStream() == null) { + return false; } - return Collections.unmodifiableMap(res); + return indicesAndAliases.contains(ret.getParentDataStream().getName()); } Map buildIndexAliasFilters(ClusterState clusterState, Set indicesAndAliases, Index[] concreteIndices) { @@ -1257,21 +1265,30 @@ static boolean shouldPreFilterSearchShards( int numShards, int defaultPreFilterShardSize ) { + if (searchRequest.searchType() != QUERY_THEN_FETCH) { + // we can't do this for DFS it needs to fan out to all shards all the time + return false; + } SearchSourceBuilder source = searchRequest.source(); Integer preFilterShardSize = searchRequest.getPreFilterShardSize(); - if (preFilterShardSize == null && (hasReadOnlyIndices(indices, clusterState) || hasPrimaryFieldSort(source))) { - preFilterShardSize = 1; - } else if (preFilterShardSize == null) { - preFilterShardSize = defaultPreFilterShardSize; + if (preFilterShardSize == null) { + if (hasReadOnlyIndices(indices, clusterState) || hasPrimaryFieldSort(source)) { + preFilterShardSize = 1; + } else { + preFilterShardSize = defaultPreFilterShardSize; + } } - return searchRequest.searchType() == QUERY_THEN_FETCH // we can't do this for DFS it needs to fan out to all shards all the time - && (SearchService.canRewriteToMatchNone(source) || hasPrimaryFieldSort(source)) - && preFilterShardSize < numShards; + return preFilterShardSize < numShards && (SearchService.canRewriteToMatchNone(source) || hasPrimaryFieldSort(source)); } private static boolean hasReadOnlyIndices(String[] indices, ClusterState clusterState) { + var blocks = clusterState.blocks(); + if (blocks.global().isEmpty() && blocks.indices().isEmpty()) { + // short circuit optimization because block check below is relatively expensive for many indices + return false; + } for (String index : indices) { - ClusterBlockException writeBlock = clusterState.blocks().indexBlockedException(ClusterBlockLevel.WRITE, index); + ClusterBlockException writeBlock = blocks.indexBlockedException(ClusterBlockLevel.WRITE, index); if (writeBlock != null) { return true; } @@ -1279,12 +1296,17 @@ private static boolean hasReadOnlyIndices(String[] indices, ClusterState cluster return false; } + // package private for testing static GroupShardsIterator mergeShardsIterators( List localShardIterators, List remoteShardIterators ) { - List shards = new ArrayList<>(remoteShardIterators); - shards.addAll(localShardIterators); + final List shards; + if (remoteShardIterators.isEmpty()) { + shards = localShardIterators; + } else { + shards = CollectionUtils.concatLists(remoteShardIterators, localShardIterators); + } return GroupShardsIterator.sortAndCreate(shards); } @@ -1717,10 +1739,15 @@ List getLocalShardsIterator( concreteIndices, searchRequest.indicesOptions() ); - return StreamSupport.stream(shardRoutings.spliterator(), false).map(it -> { - OriginalIndices finalIndices = originalIndices.get(it.shardId().getIndex().getName()); + SearchShardIterator[] list = new SearchShardIterator[shardRoutings.size()]; + int i = 0; + for (ShardIterator shardRouting : shardRoutings) { + final ShardId shardId = shardRouting.shardId(); + OriginalIndices finalIndices = originalIndices.get(shardId.getIndex().getName()); assert finalIndices != null; - return new SearchShardIterator(clusterAlias, it.shardId(), it.getShardRoutings(), finalIndices); - }).toList(); + list[i++] = new SearchShardIterator(clusterAlias, shardId, shardRouting.getShardRoutings(), finalIndices); + } + // the returned list must support in-place sorting, so this is the most memory efficient we can do here + return Arrays.asList(list); } } diff --git a/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java index b0a3a558e2956..91aa33b24d883 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java @@ -50,6 +50,7 @@ import java.io.IOException; import java.util.Collections; +import java.util.List; import java.util.Locale; import java.util.Objects; import java.util.function.Function; @@ -543,10 +544,11 @@ public static boolean hasPrimaryFieldSort(SearchSourceBuilder source) { * is an instance of this class, null otherwise. */ public static FieldSortBuilder getPrimaryFieldSortOrNull(SearchSourceBuilder source) { - if (source == null || source.sorts() == null || source.sorts().isEmpty()) { + final List> sorts; + if (source == null || (sorts = source.sorts()) == null || sorts.isEmpty()) { return null; } - return source.sorts().get(0) instanceof FieldSortBuilder ? (FieldSortBuilder) source.sorts().get(0) : null; + return sorts.get(0) instanceof FieldSortBuilder fieldSortBuilder ? fieldSortBuilder : null; } /** From e1ab21f36b01605b2556dfd84e24026c45853084 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 17 Jul 2024 09:19:45 -0400 Subject: [PATCH 18/65] Add in format nanos function (#110934) Adds a helper function to format a single long representing nanoseconds since epoch. I found myself writing this in a few places while working on https://github.com/elastic/elasticsearch/issues/109987 and decided it would be a good helper method. --- .../common/time/DateFormatter.java | 8 ++++++++ .../common/time/DateFormattersTests.java | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/common/time/DateFormatter.java b/server/src/main/java/org/elasticsearch/common/time/DateFormatter.java index 41f44dfbdedbc..45550c13174ce 100644 --- a/server/src/main/java/org/elasticsearch/common/time/DateFormatter.java +++ b/server/src/main/java/org/elasticsearch/common/time/DateFormatter.java @@ -71,6 +71,14 @@ default String formatMillis(long millis) { return format(Instant.ofEpochMilli(millis).atZone(zone)); } + /** + * Return the given nanoseconds-since-epoch formatted with this format. + */ + default String formatNanos(long nanos) { + ZoneId zone = zone() != null ? zone() : ZoneOffset.UTC; + return format(Instant.ofEpochMilli(nanos / 1_000_000).plusNanos(nanos % 1_000_000).atZone(zone)); + } + /** * A name based format for this formatter. Can be one of the registered formatters like epoch_millis or * a configured format like HH:mm:ss diff --git a/server/src/test/java/org/elasticsearch/common/time/DateFormattersTests.java b/server/src/test/java/org/elasticsearch/common/time/DateFormattersTests.java index e10cca58f8b78..a9b7cb74e548e 100644 --- a/server/src/test/java/org/elasticsearch/common/time/DateFormattersTests.java +++ b/server/src/test/java/org/elasticsearch/common/time/DateFormattersTests.java @@ -695,6 +695,23 @@ public void testMinMillis() { assertThat(javaFormatted, equalTo("-292275055-05-16T16:47:04.192Z")); } + public void testMinNanos() { + String javaFormatted = DateFormatter.forPattern("strict_date_optional_time").formatNanos(Long.MIN_VALUE); + assertThat(javaFormatted, equalTo("1677-09-21T00:12:43.145Z")); + + // Note - since this is a negative value, the nanoseconds are being subtracted, which is why we get this value. + javaFormatted = DateFormatter.forPattern("strict_date_optional_time_nanos").formatNanos(Long.MIN_VALUE); + assertThat(javaFormatted, equalTo("1677-09-21T00:12:43.145224192Z")); + } + + public void testMaxNanos() { + String javaFormatted = DateFormatter.forPattern("strict_date_optional_time").formatNanos(Long.MAX_VALUE); + assertThat(javaFormatted, equalTo("2262-04-11T23:47:16.854Z")); + + javaFormatted = DateFormatter.forPattern("strict_date_optional_time_nanos").formatNanos(Long.MAX_VALUE); + assertThat(javaFormatted, equalTo("2262-04-11T23:47:16.854775807Z")); + } + public void testYearParsing() { // this one is considered a year assertParses("1234", "strict_date_optional_time||epoch_millis"); From 272f63557cb4b4df946b794cd43fc7c8704340c0 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 17 Jul 2024 23:47:02 +1000 Subject: [PATCH 19/65] [Test] Test for EOFException when skipping beyond available data (#110953) See title --- .../AbstractThirdPartyRepositoryTestCase.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/AbstractThirdPartyRepositoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/repositories/AbstractThirdPartyRepositoryTestCase.java index bbc8fd25b7593..f0182a4e69898 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/AbstractThirdPartyRepositoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/AbstractThirdPartyRepositoryTestCase.java @@ -31,7 +31,9 @@ import org.elasticsearch.threadpool.ThreadPool; import java.io.ByteArrayInputStream; +import java.io.EOFException; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -316,6 +318,31 @@ public void testReadFromPositionWithLength() { } } + public void testSkipBeyondBlobLengthShouldThrowEOFException() throws IOException { + final var blobName = randomIdentifier(); + final int blobLength = randomIntBetween(100, 2_000); + final var blobBytes = randomBytesReference(blobLength); + + final var repository = getRepository(); + executeOnBlobStore(repository, blobStore -> { + blobStore.writeBlob(randomPurpose(), blobName, blobBytes, true); + return null; + }); + + var blobContainer = repository.blobStore().blobContainer(repository.basePath()); + try (var input = blobContainer.readBlob(randomPurpose(), blobName, 0, blobLength); var output = new BytesStreamOutput()) { + Streams.copy(input, output, false); + expectThrows(EOFException.class, () -> input.skipNBytes(randomLongBetween(1, 1000))); + } + + try (var input = blobContainer.readBlob(randomPurpose(), blobName, 0, blobLength); var output = new BytesStreamOutput()) { + final int capacity = between(1, blobLength); + final ByteBuffer byteBuffer = randomBoolean() ? ByteBuffer.allocate(capacity) : ByteBuffer.allocateDirect(capacity); + Streams.read(input, byteBuffer, capacity); + expectThrows(EOFException.class, () -> input.skipNBytes((blobLength - capacity) + randomLongBetween(1, 1000))); + } + } + protected void testReadFromPositionLargerThanBlobLength(Predicate responseCodeChecker) { final var blobName = randomIdentifier(); final var blobBytes = randomBytesReference(randomIntBetween(100, 2_000)); From 7df1b06525d574dcc250736bb506fbb058fd8c9f Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 17 Jul 2024 16:11:55 +0200 Subject: [PATCH 20/65] ESQL: Correctly compute Rename's output (#110968) Calling Rename.output() previously returned wrong results. Since #110488, instead it throws an IllegalStateException. That leads to test failures in the EsqlNodeSubclassTests because e.g. MvExpandExec and FieldExtractExec eagerly calls .output() on its child when it's being constructed, and the child can be a fragment containing a Rename. --- .../xpack/esql/core/rule/Rule.java | 4 +-- .../xpack/esql/analysis/Analyzer.java | 27 ++++++++++++++----- .../xpack/esql/plan/logical/Rename.java | 9 +++++-- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java index 6121c9b36442b..163b1f89f2abb 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java @@ -6,8 +6,8 @@ */ package org.elasticsearch.xpack.esql.core.rule; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import org.elasticsearch.xpack.esql.core.tree.Node; import org.elasticsearch.xpack.esql.core.util.ReflectionUtils; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index a691f88f29f99..0fec74bf5d7c6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.compute.data.Block; +import org.elasticsearch.logging.Logger; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.Column; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; @@ -383,7 +384,7 @@ private LocalRelation tableMapAsRelation(Source source, Map mapT } } - private static class ResolveRefs extends BaseAnalyzerRule { + public static class ResolveRefs extends BaseAnalyzerRule { @Override protected LogicalPlan doRule(LogicalPlan plan) { if (plan.childrenResolved() == false) { @@ -575,20 +576,28 @@ private LogicalPlan resolveLookup(Lookup l, List childrenOutput) { } private Attribute maybeResolveAttribute(UnresolvedAttribute ua, List childrenOutput) { + return maybeResolveAttribute(ua, childrenOutput, log); + } + + private static Attribute maybeResolveAttribute(UnresolvedAttribute ua, List childrenOutput, Logger logger) { if (ua.customMessage()) { return ua; } - return resolveAttribute(ua, childrenOutput); + return resolveAttribute(ua, childrenOutput, logger); } private Attribute resolveAttribute(UnresolvedAttribute ua, List childrenOutput) { + return resolveAttribute(ua, childrenOutput, log); + } + + private static Attribute resolveAttribute(UnresolvedAttribute ua, List childrenOutput, Logger logger) { Attribute resolved = ua; var named = resolveAgainstList(ua, childrenOutput); // if resolved, return it; otherwise keep it in place to be resolved later if (named.size() == 1) { resolved = named.get(0); - if (log.isTraceEnabled() && resolved.resolved()) { - log.trace("Resolved {} to {}", ua, resolved); + if (logger != null && logger.isTraceEnabled() && resolved.resolved()) { + logger.trace("Resolved {} to {}", ua, resolved); } } else { if (named.size() > 0) { @@ -724,6 +733,12 @@ private LogicalPlan resolveDrop(Drop drop, List childOutput) { } private LogicalPlan resolveRename(Rename rename, List childrenOutput) { + List projections = projectionsForRename(rename, childrenOutput, log); + + return new EsqlProject(rename.source(), rename.child(), projections); + } + + public static List projectionsForRename(Rename rename, List childrenOutput, Logger logger) { List projections = new ArrayList<>(childrenOutput); int renamingsCount = rename.renamings().size(); @@ -736,7 +751,7 @@ private LogicalPlan resolveRename(Rename rename, List childrenOutput) // remove attributes overwritten by a renaming: `| keep a, b, c | rename a as b` projections.removeIf(x -> x.name().equals(alias.name())); - var resolved = maybeResolveAttribute(ua, childrenOutput); + var resolved = maybeResolveAttribute(ua, childrenOutput, logger); if (resolved instanceof UnsupportedAttribute || resolved.resolved()) { var realiased = (NamedExpression) alias.replaceChildren(List.of(resolved)); projections.replaceAll(x -> x.equals(resolved) ? realiased : x); @@ -779,7 +794,7 @@ private LogicalPlan resolveRename(Rename rename, List childrenOutput) // add unresolved renamings to later trip the Verifier. projections.addAll(unresolved); - return new EsqlProject(rename.source(), rename.child(), projections); + return projections; } private LogicalPlan resolveEnrich(Enrich enrich, List childrenOutput) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java index f0b38d281474e..29ee7f0504c70 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java @@ -7,8 +7,11 @@ package org.elasticsearch.xpack.esql.plan.logical; +import org.elasticsearch.xpack.esql.analysis.Analyzer.ResolveRefs; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; @@ -31,8 +34,10 @@ public List renamings() { @Override public List output() { - // Rename is mapped to a Project during analysis; we do not compute the output here. - throw new IllegalStateException("Should never reach here."); + // Normally shouldn't reach here, as Rename only exists before resolution. + List projectionsAfterResolution = ResolveRefs.projectionsForRename(this, this.child().output(), null); + + return Expressions.asAttributes(projectionsAfterResolution); } @Override From d7b272e1455bf611d0ab4e23c3796cfcf688bdd0 Mon Sep 17 00:00:00 2001 From: Artem Prigoda Date: Wed, 17 Jul 2024 16:31:07 +0200 Subject: [PATCH 21/65] Update comments in IndexShardTests#testScheduledRefresh (#110943) * First scheduledRefresh returns false because search is idle * Remove the comment about the inability to control the result of scheduleRefresh Follow-up for #110312 --- .../java/org/elasticsearch/index/shard/IndexShardTests.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index f1b4b10405f3c..142c03cdfa053 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -3944,8 +3944,8 @@ public void testScheduledRefresh() throws Exception { }); latch.await(); - // Index a document while shard is search active and ensure scheduleRefresh(...) makes documen visible: - logger.info("--> index doc while shard search active"); + // Index a document while shard is search is idle and ensure scheduleRefresh(...) returns false: + logger.info("--> index doc while shard search is idle"); indexDoc(primary, "_doc", "2", "{\"foo\" : \"bar\"}"); logger.info("--> scheduledRefresh(future4)"); PlainActionFuture future4 = new PlainActionFuture<>(); @@ -3962,8 +3962,6 @@ public void testScheduledRefresh() throws Exception { long externalRefreshesBefore = primary.refreshStats().getExternalTotal(); logger.info("--> scheduledRefresh(future5)"); primary.scheduledRefresh(ActionListener.noop()); - // We can't check whether scheduledRefresh returns true because it races with a potential - // refresh triggered by the flush. We just check that one the refreshes ultimately wins. assertBusy(() -> assertThat(primary.refreshStats().getExternalTotal(), equalTo(externalRefreshesBefore + 1))); try (Engine.Searcher searcher = primary.acquireSearcher("test")) { assertEquals(3, searcher.getIndexReader().numDocs()); From fab6edaebc2fe0e7abb5fa1aec9a7be47bae8ce4 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 17 Jul 2024 10:34:23 -0400 Subject: [PATCH 22/65] ESQL: Expand main javadoc (#110941) This expands the package level javadoc for ESQL to reference a few more things and include some of our design rules of thumb. It also breaks it out into more different sections to line up better with including more stuff. --- .../xpack/esql/package-info.java | 130 +++++++++++++++++- 1 file changed, 123 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/package-info.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/package-info.java index 0d45ce10b1966..2476ea2363e43 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/package-info.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/package-info.java @@ -6,7 +6,97 @@ */ /** - * ES|QL Overview and Documentation Links + * The ES|QL query language. + * + *

Overview

+ * ES|QL is a typed query language which consists of many small languages separated by the {@code |} + * character. Like this: + * + *
{@code
+ *   FROM foo
+ * | WHERE a > 1
+ * | STATS m=MAX(j)
+ * | SORT m ASC
+ * | LIMIT 10
+ * }
+ * + *

+ * Here the {@code FROM}, {@code WHERE}, {@code STATS}, {@code SORT}, and {@code LIMIT} keywords + * enable the mini-language for selecting indices, filtering documents, calculate aggregates, + * sorting results, and limiting the number of results respectively. + *

+ * + *

Language Design Goals

+ * In designing ES|QL we have some principals and rules of thumb: + *
    + *
  • Don't waste people's time
  • + *
  • Progress over perfection
  • + *
  • Design for Elasticsearch
  • + *
  • Be inspired by the best
  • + *
+ * + *

Don't waste people's time

+ *
    + *
  • Queries should not fail at runtime. Instead we should return a + * {@link org.elasticsearch.xpack.esql.expression.function.Warnings warning} and {@code null}.
  • + *
  • It is ok to fail a query up front at analysis time. Just not after it's + * started.
  • + *
  • It is better if things can be made to work.
  • + *
  • But genuinely confusing requests require the query writing to make a choice.
  • + *
+ *

+ * As you can see this is a real tight rope, but we try to follow the rules above in order. Examples: + *

+ *
    + *
  • If {@link org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime TO_DATETIME} + * receives an invalid date at runtime, it emits a WARNING.
  • + *
  • If {@link org.elasticsearch.xpack.esql.expression.function.scalar.date.DateExtract DATE_EXTRACT} + * receives an invalid extract configuration at query parsing time it fails to start the query.
  • + *
  • {@link org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add 1 + 3.2} + * promotes both sides to a {@code double}.
  • + *
  • {@link org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add 1 + "32"} + * fails at query compile time and the query writer must decide to either write + * {@code CONCAT(TO_STRING(1), "32")} or {@code 1 + TO_INT("32")}.
  • + *
+ * + *

Progress over perfection

+ *
    + *
  • Stability is super important for released features.
  • + *
  • But we need to experiment and get feedback. So mark features {@code experimental} when + * there's any question about how they should work.
  • + *
  • Experimental features shouldn't live forever because folks will get tired of waiting + * and use them in production anyway. We don't officially support them in production but + * we will feel bad if they break.
  • + *
+ * + *

Design for Elasticsearch

+ * We must design the language for Elasticsearch, celebrating its advantages + * smoothing out its and quirks. + *
    + *
  • {@link org.elasticsearch.index.fielddata doc_values} sometimes sorts field values and + * sometimes sorts and removes duplicates. We couldn't hide this even if we want to and + * most folks are ok with it. ES|QL has to be useful in those cases.
  • + *
  • Multivalued fields are very easy to index in Elasticsearch so they should be easy to + * read in ES|QL. They should be easy to work with in ES|QL too, but we + * haven't gotten that far yet.
  • + *
+ * + *

Be inspired by the best

+ * We'll frequently have lots of different choices on how to implement a feature. We should talk + * and figure out the best way for us, especially considering Elasticsearch's advantages and quirks. + * But we should also look to our data-access-forebears: + *
    + *
  • PostgreSQL is the + * GOAT SQL implementation. It's a joy + * to use for everything but dates. Use DB Fiddle + * to link to syntax examples.
  • + *
  • Oracle + * is pretty good about dates. It's fine about a lot of things but PostgreSQL is better.
  • + *
  • MS SQL Server + * has a silly name but it's documentation is wonderful.
  • + *
  • SPL + * is super familiar to our users and it's a piped query language.
  • + *
* *

Major Components

*
    @@ -25,16 +115,42 @@ * but see also {@link org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper} *
* - *
  • org.elasticsearch.compute.gen - ES|QL generates code for evaluators, which are type-specific implementations of - * functions, designed to run over a {@link org.elasticsearch.compute.data.Block}
  • - *
  • {@link org.elasticsearch.xpack.esql.session.EsqlSession} - manages state across a query
  • - *
  • {@link org.elasticsearch.xpack.esql.expression.function.scalar} - Guide to writing scalar functions
  • - *
  • {@link org.elasticsearch.xpack.esql.expression.function.aggregate} - Guide to writing aggregation functions
  • + *
  • {@link org.elasticsearch.xpack.esql.session.EsqlSession} - Manages state across a query
  • *
  • {@link org.elasticsearch.xpack.esql.analysis.Analyzer} - The first step in query processing
  • + *
  • {@link org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry} - Resolves function names to + * function implementations.
  • *
  • {@link org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer} - Coordinator level logical optimizations
  • *
  • {@link org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer} - Data node level logical optimizations
  • - *
  • {@link org.elasticsearch.xpack.esql.action.RestEsqlQueryAction} - REST API entry point
  • + *
  • {@link org.elasticsearch.xpack.esql.action.RestEsqlQueryAction Sync} and + * {@link org.elasticsearch.xpack.esql.action.RestEsqlAsyncQueryAction async} HTTP API entry points
  • * + *

    Guides

    + *
      + *
    • {@link org.elasticsearch.xpack.esql.expression.function.scalar Writing scalar functions}
    • + *
    • {@link org.elasticsearch.xpack.esql.expression.function.aggregate Writing aggregation functions}
    • + *
    + * + *

    Code generation

    + * ES|QL uses two kinds of code generation which is uses mostly to + * monomorphize tight loops. That process would + * require a lot of copy-and-paste with small tweaks and some of us have copy-and-paste blindness so instead + * we use code generation. + *
      + *
    1. When possible we use StringTemplate to build + * Java files. These files typically look like {@code X-Blah.java.st} and are typically used for things + * like the different {@link org.elasticsearch.compute.data.Block} types and their subclasses and + * aggregation state. The templates themselves are easy to read and edit. This process is appropriate + * for cases where you just have to copy and paste something and change a few lines here and there. See + * {@code build.gradle} for the code generators.
    2. + *
    3. When that doesn't work, we use + * + * Annotation processing and JavaPoet to build the Java files. + * These files are typically the inner loops for {@link org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator} + * or {@link org.elasticsearch.compute.aggregation.AggregatorFunction}. The code generation is much more difficult + * to write and debug but much, much, much, much more flexible. The degree of control we have during this + * code generation is amazing but it is much harder to debug failures. See files in + * {@code org.elasticsearch.compute.gen} for the code generators.
    4. + *
    */ package org.elasticsearch.xpack.esql; From 8a719346574b43edb4594cd2b10b35652e581d7d Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 17 Jul 2024 11:11:55 -0400 Subject: [PATCH 23/65] AwaitsFix: https://github.com/elastic/elasticsearch/issues/110977 --- muted-tests.yml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 13f9884c251f0..ab31ff400b7b9 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -4,7 +4,8 @@ tests: method: "testGuessIsDayFirstFromLocale" - class: "org.elasticsearch.test.rest.ClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/108857" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ + \ dependent mappings / dates}" - class: "org.elasticsearch.upgrades.SearchStatesIT" issue: "https://github.com/elastic/elasticsearch/issues/108991" method: "testCanMatch" @@ -13,7 +14,8 @@ tests: method: "testTrainedModelInference" - class: "org.elasticsearch.xpack.security.CoreWithSecurityClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109188" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ + \ dependent mappings / dates}" - class: "org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT" issue: "https://github.com/elastic/elasticsearch/issues/109189" method: "test {p0=esql/70_locale/Date format with Italian locale}" @@ -28,7 +30,8 @@ tests: method: "testTimestampFieldTypeExposedByAllIndicesServices" - class: "org.elasticsearch.analysis.common.CommonAnalysisClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109318" - method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling (too complex pattern)}" + method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling\ + \ (too complex pattern)}" - class: "org.elasticsearch.xpack.ml.integration.ClassificationHousePricingIT" issue: "https://github.com/elastic/elasticsearch/issues/101598" method: "testFeatureImportanceValues" @@ -97,6 +100,10 @@ tests: - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT method: "test {stats.Count_or_null SYNC #2}" issue: https://github.com/elastic/elasticsearch/issues/110950 +- class: "org.elasticsearch.xpack.esql.querydsl.query.SingleValueQueryTests" + issue: "https://github.com/elastic/elasticsearch/issues/110977" + method: "testNotMatchSome {p0=StandardSetup[fieldType=keyword, multivaluedField=true,\ + \ empty=true, count=100]}" # Examples: # From 13367d0032311beb57ef5cc89bf13d222389acb4 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 17 Jul 2024 17:22:47 +0200 Subject: [PATCH 24/65] Further speedups to can-match (#110909) Most importantly this one removes spurious synchronization. Also, saves some more allocations and indirections in another sorting spot. --- .../search/CanMatchPreFilterSearchPhase.java | 52 +++++++++---------- .../elasticsearch/search/sort/MinAndMax.java | 25 ++++++--- 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java b/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java index fac43abdedbdb..30460593849c5 100644 --- a/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java @@ -22,7 +22,6 @@ import org.elasticsearch.search.CanMatchShardResponse; import org.elasticsearch.search.SearchService; import org.elasticsearch.search.SearchShardTarget; -import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.internal.AliasFilter; import org.elasticsearch.search.internal.ShardSearchRequest; import org.elasticsearch.search.sort.FieldSortBuilder; @@ -43,7 +42,6 @@ import java.util.concurrent.atomic.AtomicReferenceArray; import java.util.function.BiFunction; import java.util.stream.Collectors; -import java.util.stream.IntStream; import java.util.stream.Stream; import static org.elasticsearch.core.Strings.format; @@ -448,7 +446,11 @@ private static final class CanMatchSearchPhaseResults extends SearchPhaseResults @Override void consumeResult(CanMatchShardResponse result, Runnable next) { try { - consumeResult(result.getShardIndex(), result.canMatch(), result.estimatedMinAndMax()); + final boolean canMatch = result.canMatch(); + final MinAndMax minAndMax = result.estimatedMinAndMax(); + if (canMatch || minAndMax != null) { + consumeResult(result.getShardIndex(), canMatch, minAndMax); + } } finally { next.run(); } @@ -465,7 +467,7 @@ void consumeShardFailure(int shardIndex) { consumeResult(shardIndex, true, null); } - synchronized void consumeResult(int shardIndex, boolean canMatch, MinAndMax minAndMax) { + private synchronized void consumeResult(int shardIndex, boolean canMatch, MinAndMax minAndMax) { if (canMatch) { possibleMatches.set(shardIndex); numPossibleMatches++; @@ -494,10 +496,9 @@ private GroupShardsIterator getIterator( CanMatchSearchPhaseResults results, GroupShardsIterator shardsIts ) { - int cardinality = results.getNumPossibleMatches(); FixedBitSet possibleMatches = results.getPossibleMatches(); // TODO: pick the local shard when possible - if (requireAtLeastOneMatch && cardinality == 0) { + if (requireAtLeastOneMatch && results.getNumPossibleMatches() == 0) { // this is a special case where we have no hit but we need to get at least one search response in order // to produce a valid search result with all the aggs etc. // Since it's possible that some of the shards that we're skipping are @@ -514,7 +515,6 @@ private GroupShardsIterator getIterator( } possibleMatches.set(shardIndexToQuery); } - SearchSourceBuilder source = request.source(); int i = 0; for (SearchShardIterator iter : shardsIts) { iter.reset(); @@ -528,7 +528,7 @@ private GroupShardsIterator getIterator( if (shouldSortShards(results.minAndMaxes) == false) { return shardsIts; } - FieldSortBuilder fieldSort = FieldSortBuilder.getPrimaryFieldSortOrNull(source); + FieldSortBuilder fieldSort = FieldSortBuilder.getPrimaryFieldSortOrNull(request.source()); return new GroupShardsIterator<>(sortShards(shardsIts, results.minAndMaxes, fieldSort.order())); } @@ -537,11 +537,24 @@ private static List sortShards( MinAndMax[] minAndMaxes, SortOrder order ) { - return IntStream.range(0, shardsIts.size()) - .boxed() - .sorted(shardComparator(shardsIts, minAndMaxes, order)) - .map(shardsIts::get) - .toList(); + int bound = shardsIts.size(); + List toSort = new ArrayList<>(bound); + for (int i = 0; i < bound; i++) { + toSort.add(i); + } + Comparator> keyComparator = forciblyCast(MinAndMax.getComparator(order)); + toSort.sort((idx1, idx2) -> { + int res = keyComparator.compare(minAndMaxes[idx1], minAndMaxes[idx2]); + if (res != 0) { + return res; + } + return shardsIts.get(idx1).compareTo(shardsIts.get(idx2)); + }); + List list = new ArrayList<>(bound); + for (Integer integer : toSort) { + list.add(shardsIts.get(integer)); + } + return list; } private static boolean shouldSortShards(MinAndMax[] minAndMaxes) { @@ -559,17 +572,4 @@ private static boolean shouldSortShards(MinAndMax[] minAndMaxes) { return clazz != null; } - private static Comparator shardComparator( - GroupShardsIterator shardsIts, - MinAndMax[] minAndMaxes, - SortOrder order - ) { - final Comparator comparator = Comparator.comparing( - index -> minAndMaxes[index], - forciblyCast(MinAndMax.getComparator(order)) - ); - - return comparator.thenComparing(shardsIts::get); - } - } diff --git a/server/src/main/java/org/elasticsearch/search/sort/MinAndMax.java b/server/src/main/java/org/elasticsearch/search/sort/MinAndMax.java index 7c29f52f33847..88c9f766d536c 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/MinAndMax.java +++ b/server/src/main/java/org/elasticsearch/search/sort/MinAndMax.java @@ -55,16 +55,27 @@ public T getMax() { return maxValue; } + @SuppressWarnings({ "unchecked", "rawtypes" }) + private static final Comparator ASC_COMPARATOR = (left, right) -> { + if (left == null) { + return right == null ? 0 : -1; // nulls last + } + return right == null ? -1 : left.getMin().compareTo(right.getMin()); + }; + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private static final Comparator DESC_COMPARATOR = (left, right) -> { + if (left == null) { + return right == null ? 0 : 1; // nulls first + } + return right == null ? 1 : right.getMax().compareTo(left.getMax()); + }; + /** * Return a {@link Comparator} for {@link MinAndMax} values according to the provided {@link SortOrder}. */ + @SuppressWarnings({ "unchecked", "rawtypes" }) public static > Comparator> getComparator(SortOrder order) { - Comparator> cmp = order == SortOrder.ASC - ? Comparator.comparing(MinAndMax::getMin) - : Comparator.comparing(MinAndMax::getMax); - if (order == SortOrder.DESC) { - cmp = cmp.reversed(); - } - return Comparator.nullsLast(cmp); + return (Comparator) (order == SortOrder.ASC ? ASC_COMPARATOR : DESC_COMPARATOR); } } From e82671bc4b545494192ed237998ce2f4440a7be2 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 18 Jul 2024 02:04:53 +1000 Subject: [PATCH 25/65] Mute org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT #110978 --- muted-tests.yml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index ab31ff400b7b9..ac707ed9e5b37 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -4,8 +4,7 @@ tests: method: "testGuessIsDayFirstFromLocale" - class: "org.elasticsearch.test.rest.ClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/108857" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ - \ dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" - class: "org.elasticsearch.upgrades.SearchStatesIT" issue: "https://github.com/elastic/elasticsearch/issues/108991" method: "testCanMatch" @@ -14,8 +13,7 @@ tests: method: "testTrainedModelInference" - class: "org.elasticsearch.xpack.security.CoreWithSecurityClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109188" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ - \ dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" - class: "org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT" issue: "https://github.com/elastic/elasticsearch/issues/109189" method: "test {p0=esql/70_locale/Date format with Italian locale}" @@ -30,8 +28,7 @@ tests: method: "testTimestampFieldTypeExposedByAllIndicesServices" - class: "org.elasticsearch.analysis.common.CommonAnalysisClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109318" - method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling\ - \ (too complex pattern)}" + method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling (too complex pattern)}" - class: "org.elasticsearch.xpack.ml.integration.ClassificationHousePricingIT" issue: "https://github.com/elastic/elasticsearch/issues/101598" method: "testFeatureImportanceValues" @@ -102,8 +99,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/110950 - class: "org.elasticsearch.xpack.esql.querydsl.query.SingleValueQueryTests" issue: "https://github.com/elastic/elasticsearch/issues/110977" - method: "testNotMatchSome {p0=StandardSetup[fieldType=keyword, multivaluedField=true,\ - \ empty=true, count=100]}" + method: "testNotMatchSome {p0=StandardSetup[fieldType=keyword, multivaluedField=true, empty=true, count=100]}" +- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT + issue: https://github.com/elastic/elasticsearch/issues/110978 # Examples: # From f102826d87c683fdf10f7037b1a0b6d5756c4de5 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 18 Jul 2024 02:08:24 +1000 Subject: [PATCH 26/65] Mute org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests testInfoParameters {class org.elasticsearch.xpack.esql.plan.physical.MvExpandExec} #110980 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index ac707ed9e5b37..6eadb31cc0c68 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -102,6 +102,9 @@ tests: method: "testNotMatchSome {p0=StandardSetup[fieldType=keyword, multivaluedField=true, empty=true, count=100]}" - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT issue: https://github.com/elastic/elasticsearch/issues/110978 +- class: org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests + method: testInfoParameters {class org.elasticsearch.xpack.esql.plan.physical.MvExpandExec} + issue: https://github.com/elastic/elasticsearch/issues/110980 # Examples: # From 870c3ca62a94aca9e4ada460a4843e9e56879de0 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 18 Jul 2024 02:08:51 +1000 Subject: [PATCH 27/65] Mute org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests testInfoParameters {class org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec} #110981 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 6eadb31cc0c68..44d21b55ce3ff 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -105,6 +105,9 @@ tests: - class: org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests method: testInfoParameters {class org.elasticsearch.xpack.esql.plan.physical.MvExpandExec} issue: https://github.com/elastic/elasticsearch/issues/110980 +- class: org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests + method: testInfoParameters {class org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec} + issue: https://github.com/elastic/elasticsearch/issues/110981 # Examples: # From 22005952c6fe2643393117a9c5f4ff639d497e6b Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 17 Jul 2024 19:52:58 +0200 Subject: [PATCH 28/65] Adding minimal docs around using index mode logs. (#110932) This adds minimal docs around how to the new logs index mode for data streams (most common use case). This is minimal because logs index mode is still in tech preview. Minimal docs should allow any interested users to experiment with the new logs index mode. --- .../data-streams/data-streams.asciidoc | 1 + docs/reference/data-streams/logs.asciidoc | 52 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 docs/reference/data-streams/logs.asciidoc diff --git a/docs/reference/data-streams/data-streams.asciidoc b/docs/reference/data-streams/data-streams.asciidoc index 9c7137563caef..1484e21febdb3 100644 --- a/docs/reference/data-streams/data-streams.asciidoc +++ b/docs/reference/data-streams/data-streams.asciidoc @@ -157,4 +157,5 @@ include::set-up-a-data-stream.asciidoc[] include::use-a-data-stream.asciidoc[] include::change-mappings-and-settings.asciidoc[] include::tsds.asciidoc[] +include::logs.asciidoc[] include::lifecycle/index.asciidoc[] diff --git a/docs/reference/data-streams/logs.asciidoc b/docs/reference/data-streams/logs.asciidoc new file mode 100644 index 0000000000000..a2d8b6776e052 --- /dev/null +++ b/docs/reference/data-streams/logs.asciidoc @@ -0,0 +1,52 @@ +[[logs-data-stream]] +== Logs data stream + +preview::[Logs data streams and the logs index mode are in tech preview and may be changed or removed in the future. Don't use logs data streams or logs index mode in production.] + +A logs data stream is a data stream type that stores log data more efficiently. + +In benchmarks, log data stored in a logs data stream used ~2.5 times less disk space than a regular data +stream. The exact impact will vary depending on your data set. + +The following features are enabled in a logs data stream: + +* <>, which omits storing the `_source` field. When the document source is requested, it is synthesized from document fields upon retrieval. + +* Index sorting. This yields a lower storage footprint. By default indices are sorted by `host.name` and `@timestamp` fields at index time. + +* More space efficient compression for fields with <> enabled. + +[discrete] +[[how-to-use-logsds]] +=== Create a logs data stream + +To create a logs data stream, set your index template `index.mode` to `logs`: + +[source,console] +---- +PUT _index_template/my-index-template +{ + "index_patterns": ["logs-*"], + "data_stream": { }, + "template": { + "settings": { + "index.mode": "logs" <1> + } + }, + "priority": 101 <2> +} +---- +// TEST + +<1> The index mode setting. +<2> The index template priority. By default, Elasticsearch ships with an index template with a `logs-*-*` pattern with a priority of 100. You need to define a priority higher than 100 to ensure that this index template gets selected over the default index template for the `logs-*-*` pattern. See the <> for more information. + +After the index template is created, new indices that use the template will be configured as a logs data stream. You can start indexing data and <>. + +//// +[source,console] +---- +DELETE _index_template/my-index-template +---- +// TEST[continued] +//// From db8bd66a6fb20c8c98362ffd241cbc0fd4af5fc7 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Wed, 17 Jul 2024 13:59:15 -0400 Subject: [PATCH 29/65] [ESQL] string switch in estimate size (#110946) Move the estimated size out of EstimateRowSize and onto DataType. This eliminates a string switch, and makes it clear that writers of new data types should think about the size when possible. --- .../xpack/esql/core/type/DataType.java | 84 +++++++++++-------- .../esql/core/type/DataTypeConverter.java | 6 +- .../esql/plan/physical/EstimatesRowSize.java | 26 ++---- 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index 503c076b4f7a2..f59af4a1282cc 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -20,6 +20,7 @@ import java.util.Comparator; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.function.Function; @@ -27,9 +28,9 @@ import static java.util.stream.Collectors.toUnmodifiableMap; public enum DataType { - UNSUPPORTED(builder().typeName("UNSUPPORTED")), - NULL(builder().esType("null")), - BOOLEAN(builder().esType("boolean").size(1)), + UNSUPPORTED(builder().typeName("UNSUPPORTED").unknownSize()), + NULL(builder().esType("null").estimatedSize(0)), + BOOLEAN(builder().esType("boolean").estimatedSize(1)), /** * These are numeric fields labeled as metric counters in time-series indices. Although stored @@ -38,36 +39,40 @@ public enum DataType { * These fields are strictly for use in retrieval from indices, rate aggregation, and casting to their * parent numeric type. */ - COUNTER_LONG(builder().esType("counter_long").size(Long.BYTES).docValues().counter()), - COUNTER_INTEGER(builder().esType("counter_integer").size(Integer.BYTES).docValues().counter()), - COUNTER_DOUBLE(builder().esType("counter_double").size(Double.BYTES).docValues().counter()), - - LONG(builder().esType("long").size(Long.BYTES).wholeNumber().docValues().counter(COUNTER_LONG)), - INTEGER(builder().esType("integer").size(Integer.BYTES).wholeNumber().docValues().counter(COUNTER_INTEGER)), - SHORT(builder().esType("short").size(Short.BYTES).wholeNumber().docValues().widenSmallNumeric(INTEGER)), - BYTE(builder().esType("byte").size(Byte.BYTES).wholeNumber().docValues().widenSmallNumeric(INTEGER)), - UNSIGNED_LONG(builder().esType("unsigned_long").size(Long.BYTES).wholeNumber().docValues()), - DOUBLE(builder().esType("double").size(Double.BYTES).rationalNumber().docValues().counter(COUNTER_DOUBLE)), - FLOAT(builder().esType("float").size(Float.BYTES).rationalNumber().docValues().widenSmallNumeric(DOUBLE)), - HALF_FLOAT(builder().esType("half_float").size(Float.BYTES).rationalNumber().docValues().widenSmallNumeric(DOUBLE)), - SCALED_FLOAT(builder().esType("scaled_float").size(Long.BYTES).rationalNumber().docValues().widenSmallNumeric(DOUBLE)), + COUNTER_LONG(builder().esType("counter_long").estimatedSize(Long.BYTES).docValues().counter()), + COUNTER_INTEGER(builder().esType("counter_integer").estimatedSize(Integer.BYTES).docValues().counter()), + COUNTER_DOUBLE(builder().esType("counter_double").estimatedSize(Double.BYTES).docValues().counter()), + + LONG(builder().esType("long").estimatedSize(Long.BYTES).wholeNumber().docValues().counter(COUNTER_LONG)), + INTEGER(builder().esType("integer").estimatedSize(Integer.BYTES).wholeNumber().docValues().counter(COUNTER_INTEGER)), + SHORT(builder().esType("short").estimatedSize(Short.BYTES).wholeNumber().docValues().widenSmallNumeric(INTEGER)), + BYTE(builder().esType("byte").estimatedSize(Byte.BYTES).wholeNumber().docValues().widenSmallNumeric(INTEGER)), + UNSIGNED_LONG(builder().esType("unsigned_long").estimatedSize(Long.BYTES).wholeNumber().docValues()), + DOUBLE(builder().esType("double").estimatedSize(Double.BYTES).rationalNumber().docValues().counter(COUNTER_DOUBLE)), + FLOAT(builder().esType("float").estimatedSize(Float.BYTES).rationalNumber().docValues().widenSmallNumeric(DOUBLE)), + HALF_FLOAT(builder().esType("half_float").estimatedSize(Float.BYTES).rationalNumber().docValues().widenSmallNumeric(DOUBLE)), + SCALED_FLOAT(builder().esType("scaled_float").estimatedSize(Long.BYTES).rationalNumber().docValues().widenSmallNumeric(DOUBLE)), KEYWORD(builder().esType("keyword").unknownSize().docValues()), TEXT(builder().esType("text").unknownSize()), - DATETIME(builder().esType("date").typeName("DATETIME").size(Long.BYTES).docValues()), - IP(builder().esType("ip").size(45).docValues()), - VERSION(builder().esType("version").unknownSize().docValues()), - OBJECT(builder().esType("object")), - NESTED(builder().esType("nested")), + DATETIME(builder().esType("date").typeName("DATETIME").estimatedSize(Long.BYTES).docValues()), + // IP addresses, both IPv4 and IPv6, are encoded using 16 bytes. + IP(builder().esType("ip").estimatedSize(16).docValues()), + // 8.15.2-SNAPSHOT is 15 bytes, most are shorter, some can be longer + VERSION(builder().esType("version").estimatedSize(15).docValues()), + OBJECT(builder().esType("object").unknownSize()), + NESTED(builder().esType("nested").unknownSize()), SOURCE(builder().esType(SourceFieldMapper.NAME).unknownSize()), - DATE_PERIOD(builder().typeName("DATE_PERIOD").size(3 * Integer.BYTES)), - TIME_DURATION(builder().typeName("TIME_DURATION").size(Integer.BYTES + Long.BYTES)), - GEO_POINT(builder().esType("geo_point").size(Double.BYTES * 2).docValues()), - CARTESIAN_POINT(builder().esType("cartesian_point").size(Double.BYTES * 2).docValues()), - CARTESIAN_SHAPE(builder().esType("cartesian_shape").unknownSize().docValues()), - GEO_SHAPE(builder().esType("geo_shape").unknownSize().docValues()), - - DOC_DATA_TYPE(builder().esType("_doc").size(Integer.BYTES * 3)), + DATE_PERIOD(builder().typeName("DATE_PERIOD").estimatedSize(3 * Integer.BYTES)), + TIME_DURATION(builder().typeName("TIME_DURATION").estimatedSize(Integer.BYTES + Long.BYTES)), + // WKB for points is typically 21 bytes. + GEO_POINT(builder().esType("geo_point").estimatedSize(21).docValues()), + CARTESIAN_POINT(builder().esType("cartesian_point").estimatedSize(21).docValues()), + // wild estimate for size, based on some test data (airport_city_boundaries) + CARTESIAN_SHAPE(builder().esType("cartesian_shape").estimatedSize(200).docValues()), + GEO_SHAPE(builder().esType("geo_shape").estimatedSize(200).docValues()), + + DOC_DATA_TYPE(builder().esType("_doc").estimatedSize(Integer.BYTES * 3)), TSID_DATA_TYPE(builder().esType("_tsid").unknownSize().docValues()), PARTIAL_AGG(builder().esType("partial_agg").unknownSize()); @@ -77,7 +82,7 @@ public enum DataType { private final String esType; - private final int size; + private final Optional estimatedSize; /** * True if the type represents a "whole number", as in, does not have a decimal part. @@ -113,10 +118,11 @@ public enum DataType { DataType(Builder builder) { String typeString = builder.typeName != null ? builder.typeName : builder.esType; + assert builder.estimatedSize != null : "Missing size for type " + typeString; this.typeName = typeString.toLowerCase(Locale.ROOT); this.name = typeString.toUpperCase(Locale.ROOT); this.esType = builder.esType; - this.size = builder.size; + this.estimatedSize = builder.estimatedSize; this.isWholeNumber = builder.isWholeNumber; this.isRationalNumber = builder.isRationalNumber; this.docValues = builder.docValues; @@ -282,8 +288,12 @@ public boolean isNumeric() { return isWholeNumber || isRationalNumber; } - public int size() { - return size; + /** + * @return the estimated size, in bytes, of this data type. If there's no reasonable way to estimate the size, + * the optional will be empty. + */ + public Optional estimatedSize() { + return estimatedSize; } public boolean hasDocValues() { @@ -352,7 +362,7 @@ private static class Builder { private String typeName; - private int size; + private Optional estimatedSize; /** * True if the type represents a "whole number", as in, does not have a decimal part. @@ -398,13 +408,13 @@ Builder typeName(String typeName) { return this; } - Builder size(int size) { - this.size = size; + Builder estimatedSize(int size) { + this.estimatedSize = Optional.of(size); return this; } Builder unknownSize() { - this.size = Integer.MAX_VALUE; + this.estimatedSize = Optional.empty(); return this; } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java index bd87a92f3289d..e2d6f79a873c9 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java @@ -77,6 +77,8 @@ public static DataType commonType(DataType left, DataType right) { return right; } if (left.isNumeric() && right.isNumeric()) { + int lsize = left.estimatedSize().orElseThrow(); + int rsize = right.estimatedSize().orElseThrow(); // if one is int if (left.isWholeNumber()) { // promote the highest int @@ -84,7 +86,7 @@ public static DataType commonType(DataType left, DataType right) { if (left == UNSIGNED_LONG || right == UNSIGNED_LONG) { return UNSIGNED_LONG; } - return left.size() > right.size() ? left : right; + return lsize > rsize ? left : right; } // promote the rational return right; @@ -94,7 +96,7 @@ public static DataType commonType(DataType left, DataType right) { return left; } // promote the highest rational - return left.size() > right.size() ? left : right; + return lsize > rsize ? left : right; } if (isString(left)) { if (right.isNumeric()) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EstimatesRowSize.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EstimatesRowSize.java index 40c9067efbeda..cfb6cce2579a2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EstimatesRowSize.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EstimatesRowSize.java @@ -103,24 +103,12 @@ public String toString() { static int estimateSize(DataType dataType) { ElementType elementType = PlannerUtils.toElementType(dataType); - return switch (elementType) { - case BOOLEAN -> 1; - case BYTES_REF -> switch (dataType.typeName()) { - case "ip" -> 16; // IP addresses, both IPv4 and IPv6, are encoded using 16 bytes. - case "version" -> 15; // 8.15.2-SNAPSHOT is 15 bytes, most are shorter, some can be longer - case "geo_point", "cartesian_point" -> 21; // WKB for points is typically 21 bytes. - case "geo_shape", "cartesian_shape" -> 200; // wild estimate, based on some test data (airport_city_boundaries) - default -> 50; // wild estimate for the size of a string. - }; - case DOC -> throw new EsqlIllegalArgumentException("can't load a [doc] with field extraction"); - case FLOAT -> Float.BYTES; - case DOUBLE -> Double.BYTES; - case INT -> Integer.BYTES; - case LONG -> Long.BYTES; - case NULL -> 0; - // TODO: provide a specific estimate for aggregated_metrics_double - case COMPOSITE -> 50; - case UNKNOWN -> throw new EsqlIllegalArgumentException("[unknown] can't be the result of field extraction"); - }; + if (elementType == ElementType.DOC) { + throw new EsqlIllegalArgumentException("can't load a [doc] with field extraction"); + } + if (elementType == ElementType.UNKNOWN) { + throw new EsqlIllegalArgumentException("[unknown] can't be the result of field extraction"); + } + return dataType.estimatedSize().orElse(50); } } From 28c7cbccce21c2119ae6a33a5f1cd3a5fa18f9c0 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 17 Jul 2024 15:20:57 -0400 Subject: [PATCH 30/65] Make empty string searches be consistent with case (in)sensitivity (#110833) If we determine that the searchable term is completely empty, we switch back to a regular term query. This way we return the same docs as expected when we do a case sensitive search. closes: #108968 --- docs/changelog/110833.yaml | 5 +++ .../test/search/171_term_query.yml | 37 +++++++++++++++++++ .../index/mapper/TermBasedFieldType.java | 8 +++- 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/110833.yaml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/171_term_query.yml diff --git a/docs/changelog/110833.yaml b/docs/changelog/110833.yaml new file mode 100644 index 0000000000000..008fc489ed731 --- /dev/null +++ b/docs/changelog/110833.yaml @@ -0,0 +1,5 @@ +pr: 110833 +summary: Make empty string searches be consistent with case (in)sensitivity +area: Search +type: bug +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/171_term_query.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/171_term_query.yml new file mode 100644 index 0000000000000..5ab65b0c69e8a --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/171_term_query.yml @@ -0,0 +1,37 @@ +--- +"case insensitive term query on blank keyword is consistent": + - requires: + cluster_features: [ "gte_v8.16.0" ] + reason: "query consistency bug fix in 8.16.0" + - do: + indices.create: + index: index_with_blank_keyword + body: + settings: + number_of_shards: 1 + mappings: + properties: + keyword_field: + type: keyword + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "index_with_blank_keyword", "_id": "1"}}' + - '{"keyword_field": ""}' + + - do: + search: + rest_total_hits_as_int: true + index: index_with_blank_keyword + body: {"query" : {"term" : {"keyword_field" : {"value": ""}}}} + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: index_with_blank_keyword + body: { "query": { "term": { "keyword_field": {"value": "", "case_insensitive": true } } } } + + - match: { hits.total: 1 } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TermBasedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/TermBasedFieldType.java index 80e6d04d967d5..f574e509df9b9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TermBasedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TermBasedFieldType.java @@ -45,7 +45,13 @@ protected BytesRef indexedValueForSearch(Object value) { @Override public Query termQueryCaseInsensitive(Object value, SearchExecutionContext context) { failIfNotIndexed(); - return AutomatonQueries.caseInsensitiveTermQuery(new Term(name(), indexedValueForSearch(value))); + final BytesRef valueForSearch = indexedValueForSearch(value); + // check if valueForSearch is the same as an empty string + // if we have a length of zero, just do a regular term query + if (valueForSearch.length == 0) { + return termQuery(value, context); + } + return AutomatonQueries.caseInsensitiveTermQuery(new Term(name(), valueForSearch)); } @Override From d943a1fac4ac84e2cf9eb813f165f67f3f877ba7 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 17 Jul 2024 17:00:28 -0400 Subject: [PATCH 31/65] Fix references to incorrect query rule criteria type (#110994) --- docs/reference/query-rules/apis/put-query-rule.asciidoc | 4 ++-- docs/reference/query-rules/apis/put-query-ruleset.asciidoc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/reference/query-rules/apis/put-query-rule.asciidoc b/docs/reference/query-rules/apis/put-query-rule.asciidoc index 2b9a6ba892b84..9737673be009c 100644 --- a/docs/reference/query-rules/apis/put-query-rule.asciidoc +++ b/docs/reference/query-rules/apis/put-query-rule.asciidoc @@ -70,10 +70,10 @@ Matches all queries, regardless of input. -- - `metadata` (Optional, string) The metadata field to match against. This metadata will be used to match against `match_criteria` sent in the <>. -Required for all criteria types except `global`. +Required for all criteria types except `always`. - `values` (Optional, array of strings) The values to match against the metadata field. Only one value must match for the criteria to be met. -Required for all criteria types except `global`. +Required for all criteria types except `always`. `actions`:: (Required, object) The actions to take when the rule is matched. diff --git a/docs/reference/query-rules/apis/put-query-ruleset.asciidoc b/docs/reference/query-rules/apis/put-query-ruleset.asciidoc index 012060e1004ae..c164e9e140a4e 100644 --- a/docs/reference/query-rules/apis/put-query-ruleset.asciidoc +++ b/docs/reference/query-rules/apis/put-query-ruleset.asciidoc @@ -78,10 +78,10 @@ Matches all queries, regardless of input. -- - `metadata` (Optional, string) The metadata field to match against. This metadata will be used to match against `match_criteria` sent in the <>. -Required for all criteria types except `global`. +Required for all criteria types except `always`. - `values` (Optional, array of strings) The values to match against the metadata field. Only one value must match for the criteria to be met. -Required for all criteria types except `global`. +Required for all criteria types except `always`. Actions depend on the rule type. For `pinned` rules, actions follow the format specified by the <>. From cfbda244b81a47915f3ea1bedfd40287df2fe380 Mon Sep 17 00:00:00 2001 From: Dianna Hohensee Date: Wed, 17 Jul 2024 19:59:38 -0400 Subject: [PATCH 32/65] Add snapshot code comments and couple renames (#110613) Renamed SnapshotsInProgress.Entry.shardsByRepoShardId() and ShardGeneration.toBlobNamePart(), updated some variable names, and generally added/clarified comments throughout the snapshotting code. --- .../snapshots/CloneSnapshotIT.java | 8 +- .../TransportSnapshotsStatusAction.java | 5 +- .../cluster/RepositoryCleanupInProgress.java | 3 + .../cluster/SnapshotDeletionsInProgress.java | 2 +- .../cluster/SnapshotsInProgress.java | 81 ++++++--- .../cluster/routing/ShardRouting.java | 2 +- .../BlobStoreIndexShardSnapshots.java | 12 +- .../repositories/FinalizeSnapshotContext.java | 5 + .../repositories/Repository.java | 4 +- .../repositories/RepositoryData.java | 3 +- .../repositories/ShardGeneration.java | 4 +- .../blobstore/BlobStoreRepository.java | 20 +-- .../blobstore/GetSnapshotInfoContext.java | 2 +- .../repositories/blobstore/package-info.java | 29 ++-- .../InFlightShardSnapshotStates.java | 3 +- .../elasticsearch/snapshots/SnapshotInfo.java | 5 +- .../snapshots/SnapshotShardsService.java | 9 +- .../snapshots/SnapshotsService.java | 159 ++++++++++-------- .../elasticsearch/snapshots/package-info.java | 14 +- .../snapshots/SnapshotResiliencyTests.java | 2 +- .../snapshots/SnapshotsServiceTests.java | 5 +- .../ESBlobStoreRepositoryIntegTestCase.java | 2 +- 22 files changed, 234 insertions(+), 145 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index a035abb81d7e6..d7c7acf9737a1 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -651,7 +651,7 @@ public void testStartCloneWithSuccessfulShardSnapshotPendingFinalization() throw try { awaitClusterState(clusterState -> { final List entries = SnapshotsInProgress.get(clusterState).forRepo(repoName); - return entries.size() == 2 && entries.get(1).shardsByRepoShardId().isEmpty() == false; + return entries.size() == 2 && entries.get(1).shardSnapshotStatusByRepoShardId().isEmpty() == false; }); assertFalse(blockedSnapshot.isDone()); } finally { @@ -688,9 +688,9 @@ public void testStartCloneDuringRunningDelete() throws Exception { logger.info("--> waiting for snapshot clone to be fully initialized"); awaitClusterState(state -> { for (SnapshotsInProgress.Entry entry : SnapshotsInProgress.get(state).forRepo(repoName)) { - if (entry.shardsByRepoShardId().isEmpty() == false) { + if (entry.shardSnapshotStatusByRepoShardId().isEmpty() == false) { assertEquals(sourceSnapshot, entry.source().getName()); - for (SnapshotsInProgress.ShardSnapshotStatus value : entry.shardsByRepoShardId().values()) { + for (SnapshotsInProgress.ShardSnapshotStatus value : entry.shardSnapshotStatusByRepoShardId().values()) { assertSame(value, SnapshotsInProgress.ShardSnapshotStatus.UNASSIGNED_QUEUED); } return true; @@ -895,7 +895,7 @@ private static BlobStoreIndexShardSnapshots readShardGeneration( return BlobStoreRepository.INDEX_SHARD_SNAPSHOTS_FORMAT.read( repository.getMetadata().name(), repository.shardContainer(repositoryShardId.index(), repositoryShardId.shardId()), - generation.toBlobNamePart(), + generation.getGenerationUUID(), NamedXContentRegistry.EMPTY ); } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java index 28f970eb8c9fe..caedc3363e9a3 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java @@ -127,7 +127,7 @@ protected void masterOperation( Set nodesIds = new HashSet<>(); for (SnapshotsInProgress.Entry entry : currentSnapshots) { - for (SnapshotsInProgress.ShardSnapshotStatus status : entry.shardsByRepoShardId().values()) { + for (SnapshotsInProgress.ShardSnapshotStatus status : entry.shardSnapshotStatusByRepoShardId().values()) { if (status.nodeId() != null) { nodesIds.add(status.nodeId()); } @@ -188,7 +188,8 @@ private void buildResponse( for (SnapshotsInProgress.Entry entry : currentSnapshotEntries) { currentSnapshotNames.add(entry.snapshot().getSnapshotId().getName()); List shardStatusBuilder = new ArrayList<>(); - for (Map.Entry shardEntry : entry.shardsByRepoShardId() + for (Map.Entry shardEntry : entry + .shardSnapshotStatusByRepoShardId() .entrySet()) { SnapshotsInProgress.ShardSnapshotStatus status = shardEntry.getValue(); if (status.nodeId() != null) { diff --git a/server/src/main/java/org/elasticsearch/cluster/RepositoryCleanupInProgress.java b/server/src/main/java/org/elasticsearch/cluster/RepositoryCleanupInProgress.java index 2dba73a3ec68f..cc5e71b38ecb2 100644 --- a/server/src/main/java/org/elasticsearch/cluster/RepositoryCleanupInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/RepositoryCleanupInProgress.java @@ -21,6 +21,9 @@ import java.util.Iterator; import java.util.List; +/** + * A repository cleanup request entry. Part of the cluster state. + */ public final class RepositoryCleanupInProgress extends AbstractNamedDiffable implements ClusterState.Custom { public static final RepositoryCleanupInProgress EMPTY = new RepositoryCleanupInProgress(List.of()); diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotDeletionsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotDeletionsInProgress.java index eea89c6ff3714..914bf2d0cdb3e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotDeletionsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotDeletionsInProgress.java @@ -32,7 +32,7 @@ import java.util.Set; /** - * A class that represents the snapshot deletions that are in progress in the cluster. + * Represents the in-progress snapshot deletions in the cluster state. */ public class SnapshotDeletionsInProgress extends AbstractNamedDiffable implements Custom { diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index b6fb370991a93..7b0ab346501f3 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -68,7 +68,7 @@ public class SnapshotsInProgress extends AbstractNamedDiffable implement public static final String ABORTED_FAILURE_TEXT = "Snapshot was aborted by deletion"; - // keyed by repository name + /** Maps repository name to list of snapshots in that repository */ private final Map entries; /** @@ -86,6 +86,9 @@ public class SnapshotsInProgress extends AbstractNamedDiffable implement // INIT state. private final Set nodesIdsForRemoval; + /** + * Returns the SnapshotInProgress metadata present within the given cluster state. + */ public static SnapshotsInProgress get(ClusterState state) { return state.custom(TYPE, EMPTY); } @@ -145,6 +148,9 @@ public SnapshotsInProgress withAddedEntry(Entry entry) { return withUpdatedEntriesForRepo(entry.repository(), forRepo); } + /** + * Returns the list of snapshots in the specified repository. + */ public List forRepo(String repository) { return entries.getOrDefault(repository, ByRepo.EMPTY).entries; } @@ -171,14 +177,18 @@ public Stream asStream() { @Nullable public Entry snapshot(final Snapshot snapshot) { - return findInList(snapshot, forRepo(snapshot.getRepository())); + return findSnapshotInList(snapshot, forRepo(snapshot.getRepository())); } + /** + * Searches for a particular {@code snapshotToFind} in the given snapshot list. + * @return a matching snapshot entry or null. + */ @Nullable - private static Entry findInList(Snapshot snapshot, List forRepo) { + private static Entry findSnapshotInList(Snapshot snapshotToFind, List forRepo) { for (Entry entry : forRepo) { - final Snapshot curr = entry.snapshot(); - if (curr.equals(snapshot)) { + final Snapshot snapshot = entry.snapshot(); + if (snapshot.equals(snapshotToFind)) { return entry; } } @@ -186,30 +196,41 @@ private static Entry findInList(Snapshot snapshot, List forRepo) { } /** - * Computes a map of repository shard id to set of generations, containing all shard generations that became obsolete and may be - * deleted from the repository as the cluster state moved from the given {@code old} value of {@link SnapshotsInProgress} to this - * instance. + * Computes a map of repository shard id to set of shard generations, containing all shard generations that became obsolete and may be + * deleted from the repository as the cluster state moves from the given old value of {@link SnapshotsInProgress} to this instance. + *

    + * An unique shard generation is created for every in-progress shard snapshot. The shard generation file contains information about all + * the files needed by pre-existing and any new shard snapshots that were in-progress. When a shard snapshot is finalized, its file list + * is promoted to the official shard snapshot list for the index shard. This final list will contain metadata about any other + * in-progress shard snapshots that were not yet finalized when it began. All these other in-progress shard snapshot lists are scheduled + * for deletion now. */ - public Map> obsoleteGenerations(String repository, SnapshotsInProgress old) { + public Map> obsoleteGenerations( + String repository, + SnapshotsInProgress oldClusterStateSnapshots + ) { final Map> obsoleteGenerations = new HashMap<>(); - final List updatedSnapshots = forRepo(repository); - for (Entry entry : old.forRepo(repository)) { - final Entry updatedEntry = findInList(entry.snapshot(), updatedSnapshots); - if (updatedEntry == null || updatedEntry == entry) { + final List latestSnapshots = forRepo(repository); + + for (Entry oldEntry : oldClusterStateSnapshots.forRepo(repository)) { + final Entry matchingLatestEntry = findSnapshotInList(oldEntry.snapshot(), latestSnapshots); + if (matchingLatestEntry == null || matchingLatestEntry == oldEntry) { + // The snapshot progress has not changed. continue; } - for (Map.Entry oldShardAssignment : entry.shardsByRepoShardId().entrySet()) { + for (Map.Entry oldShardAssignment : oldEntry.shardSnapshotStatusByRepoShardId() + .entrySet()) { final RepositoryShardId repositoryShardId = oldShardAssignment.getKey(); final ShardSnapshotStatus oldStatus = oldShardAssignment.getValue(); - final ShardSnapshotStatus newStatus = updatedEntry.shardsByRepoShardId().get(repositoryShardId); + final ShardSnapshotStatus newStatus = matchingLatestEntry.shardSnapshotStatusByRepoShardId().get(repositoryShardId); if (oldStatus.state == ShardState.SUCCESS && oldStatus.generation() != null && newStatus != null && newStatus.state() == ShardState.SUCCESS && newStatus.generation() != null && oldStatus.generation().equals(newStatus.generation()) == false) { - // We moved from a non-null generation successful generation to a different non-null successful generation - // so the original generation is clearly obsolete because it was in-flight before and is now unreferenced everywhere. + // We moved from a non-null successful generation to a different non-null successful generation + // so the original generation is obsolete because it was in-flight before and is now unreferenced. obsoleteGenerations.computeIfAbsent(repositoryShardId, ignored -> new HashSet<>()).add(oldStatus.generation()); logger.debug( """ @@ -218,7 +239,7 @@ public Map> obsoleteGenerations(String r """, oldStatus.generation(), newStatus.generation(), - entry.snapshot(), + oldEntry.snapshot(), repositoryShardId.shardId(), oldStatus.nodeId() ); @@ -399,7 +420,7 @@ private static boolean assertConsistentEntries(Map entries) { assert entriesForRepository.isEmpty() == false : "found empty list of snapshots for " + repository + " in " + entries; for (Entry entry : entriesForRepository) { assert entry.repository().equals(repository) : "mismatched repository " + entry + " tracked under " + repository; - for (Map.Entry shard : entry.shardsByRepoShardId().entrySet()) { + for (Map.Entry shard : entry.shardSnapshotStatusByRepoShardId().entrySet()) { final RepositoryShardId sid = shard.getKey(); final ShardSnapshotStatus shardSnapshotStatus = shard.getValue(); assert assertShardStateConsistent( @@ -520,11 +541,17 @@ public boolean nodeIdsForRemovalChanged(SnapshotsInProgress other) { return nodesIdsForRemoval.equals(other.nodesIdsForRemoval) == false; } + /** + * The current stage/phase of the shard snapshot, and whether it has completed or failed. + */ public enum ShardState { INIT((byte) 0, false, false), SUCCESS((byte) 2, true, false), FAILED((byte) 3, true, true), ABORTED((byte) 4, false, true), + /** + * Shard primary is unassigned and shard cannot be snapshotted. + */ MISSING((byte) 5, true, true), /** * Shard snapshot is waiting for the primary to snapshot to become available. @@ -611,6 +638,13 @@ public static State fromValue(byte value) { } } + /** + * @param nodeId node snapshotting the shard + * @param state the current phase of the snapshot + * @param generation shard generation ID identifying a particular snapshot of a shard + * @param reason what initiated the shard snapshot + * @param shardSnapshotResult only set if the snapshot has been successful, contains information for the shard finalization phase + */ public record ShardSnapshotStatus( @Nullable String nodeId, ShardState state, @@ -779,7 +813,7 @@ public static class Entry implements Writeable, ToXContentObject, RepositoryOper private final SnapshotId source; /** - * Map of {@link RepositoryShardId} to {@link ShardSnapshotStatus} tracking the state of each shard operation in this entry. + * Map of {@link RepositoryShardId} to {@link ShardSnapshotStatus} tracking the state of each shard operation in this snapshot. */ private final Map shardStatusByRepoShardId; @@ -1201,7 +1235,7 @@ public Entry withStartedShards(Map shards) { userMetadata, version ); - assert updated.state().completed() == false && completed(updated.shardsByRepoShardId().values()) == false + assert updated.state().completed() == false && completed(updated.shardSnapshotStatusByRepoShardId().values()) == false : "Only running snapshots allowed but saw [" + updated + "]"; return updated; } @@ -1215,7 +1249,10 @@ public Snapshot snapshot() { return this.snapshot; } - public Map shardsByRepoShardId() { + /** + * Returns a map of shards to their snapshot status. + */ + public Map shardSnapshotStatusByRepoShardId() { return shardStatusByRepoShardId; } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java index 523dc0efd450b..8abb1c76da142 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java @@ -240,7 +240,7 @@ public boolean started() { } /** - * Returns true iff the this shard is currently relocating to + * Returns true iff this shard is currently relocating to * another node. Otherwise false * * @see ShardRoutingState#RELOCATING diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java index 113d3c8f28a19..b17545a4cbeb6 100644 --- a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java +++ b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java @@ -33,8 +33,8 @@ /** * Contains information about all snapshots for the given shard in repository *

    - * This class is used to find files that were already snapshotted and clear out files that no longer referenced by any - * snapshots. + * This class is used to find shard files that were already snapshotted and clear out shard files that are no longer referenced by any + * snapshots of the shard. */ public class BlobStoreIndexShardSnapshots implements Iterable, ToXContentFragment { @@ -48,6 +48,10 @@ private BlobStoreIndexShardSnapshots(Map files, List retainedSnapshots) { if (retainedSnapshots.isEmpty()) { return EMPTY; @@ -68,6 +72,10 @@ public BlobStoreIndexShardSnapshots withRetainedSnapshots(Set retain return new BlobStoreIndexShardSnapshots(newFiles, updatedSnapshots); } + /** + * Creates a new list of the shard's snapshots ({@link BlobStoreIndexShardSnapshots}) adding a new shard snapshot + * ({@link SnapshotFiles}). + */ public BlobStoreIndexShardSnapshots withAddedSnapshot(SnapshotFiles snapshotFiles) { Map updatedFiles = null; for (FileInfo fileInfo : snapshotFiles.indexFiles()) { diff --git a/server/src/main/java/org/elasticsearch/repositories/FinalizeSnapshotContext.java b/server/src/main/java/org/elasticsearch/repositories/FinalizeSnapshotContext.java index b459e1cfc7338..0e38c5722c116 100644 --- a/server/src/main/java/org/elasticsearch/repositories/FinalizeSnapshotContext.java +++ b/server/src/main/java/org/elasticsearch/repositories/FinalizeSnapshotContext.java @@ -99,8 +99,13 @@ public Map> obsoleteShardGenerations() { return obsoleteGenerations.get(); } + /** + * Returns a new {@link ClusterState}, based on the given {@code state} with the create-snapshot entry removed. + */ public ClusterState updatedClusterState(ClusterState state) { final ClusterState updatedState = SnapshotsService.stateWithoutSnapshot(state, snapshotInfo.snapshot(), updatedShardGenerations); + // Now that the updated cluster state may have changed in-progress shard snapshots' shard generations to the latest shard + // generation, let's mark any now unreferenced shard generations as obsolete and ready to be deleted. obsoleteGenerations.set( SnapshotsInProgress.get(updatedState).obsoleteGenerations(snapshotInfo.repository(), SnapshotsInProgress.get(state)) ); diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index a90b0a217285c..06a53053bca88 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -133,8 +133,8 @@ public void onFailure(Exception e) { IndexMetadata getSnapshotIndexMetaData(RepositoryData repositoryData, SnapshotId snapshotId, IndexId index) throws IOException; /** - * Returns a {@link RepositoryData} to describe the data in the repository, including the snapshots and the indices across all snapshots - * found in the repository. Completes the listener with a {@link RepositoryException} if there was an error in reading the data. + * Fetches the {@link RepositoryData} and passes it into the listener. May completes the listener with a {@link RepositoryException} if + * there is an error in reading the repository data. * * @param responseExecutor Executor to use to complete the listener if not using the calling thread. Using {@link * org.elasticsearch.common.util.concurrent.EsExecutors#DIRECT_EXECUTOR_SERVICE} means to complete the listener diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java b/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java index 17ac4ef38f1b6..c6494eca9823b 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java @@ -47,8 +47,7 @@ import java.util.stream.Collectors; /** - * A class that represents the data in a repository, as captured in the - * repository's index blob. + * Represents the data in a repository: the snapshots and the indices across all snapshots found in the repository. */ public final class RepositoryData { diff --git a/server/src/main/java/org/elasticsearch/repositories/ShardGeneration.java b/server/src/main/java/org/elasticsearch/repositories/ShardGeneration.java index 5bdd68b14762e..275bbdb3da45d 100644 --- a/server/src/main/java/org/elasticsearch/repositories/ShardGeneration.java +++ b/server/src/main/java/org/elasticsearch/repositories/ShardGeneration.java @@ -76,9 +76,9 @@ public void writeTo(StreamOutput out) throws IOException { } /** - * Convert to a {@link String} for use in naming the {@code index-$SHARD_GEN} blob containing a {@link BlobStoreIndexShardSnapshots}. + * For use in naming the {@code index-$SHARD_GEN} blob containing a {@link BlobStoreIndexShardSnapshots}. */ - public String toBlobNamePart() { + public String getGenerationUUID() { return rawGeneration; } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index a3b36f67b316b..96fcf0512cbff 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -598,7 +598,7 @@ public void cloneShardSnapshot( INDEX_SHARD_SNAPSHOTS_FORMAT.write( existingSnapshots.withClone(source.getName(), target.getName()), shardContainer, - newGen.toBlobNamePart(), + newGen.getGenerationUUID(), compress ); return new ShardSnapshotResult( @@ -1307,7 +1307,7 @@ private void deleteFromShardSnapshotMeta(BlobStoreIndexShardSnapshots updatedSna INDEX_SHARD_SNAPSHOTS_FORMAT.write( updatedSnapshots, shardContainer, - writtenGeneration.toBlobNamePart(), + writtenGeneration.getGenerationUUID(), compress ); } else { @@ -1330,7 +1330,7 @@ private void deleteFromShardSnapshotMeta(BlobStoreIndexShardSnapshots updatedSna "Failed to finalize snapshot deletion " + snapshotIds + " with shard index [" - + INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(writtenGeneration.toBlobNamePart()) + + INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(writtenGeneration.getGenerationUUID()) + "]", e ); @@ -1876,7 +1876,7 @@ private void cleanupOldMetadata( (indexId, gens) -> gens.forEach( (shardId, oldGen) -> toDelete.add( shardPath(indexId, shardId).buildAsString().substring(prefixPathLen) + INDEX_FILE_PREFIX + oldGen - .toBlobNamePart() + .getGenerationUUID() ) ) ); @@ -1937,7 +1937,7 @@ public void getSnapshotInfo( } /** - * Tries to poll a {@link SnapshotId} to load {@link SnapshotInfo} for from the given {@code queue}. + * Tries to poll a {@link SnapshotId} to load {@link SnapshotInfo} from the given {@code queue}. */ private void getOneSnapshotInfo(BlockingQueue queue, GetSnapshotInfoContext context) { final SnapshotId snapshotId = queue.poll(); @@ -3289,7 +3289,7 @@ private void doSnapshotShard(SnapshotShardContext context) { INDEX_SHARD_SNAPSHOTS_FORMAT.write( updatedBlobStoreIndexShardSnapshots, shardContainer, - indexGeneration.toBlobNamePart(), + indexGeneration.getGenerationUUID(), compress, serializationParams ); @@ -3300,7 +3300,7 @@ private void doSnapshotShard(SnapshotShardContext context) { "Failed to write shard level snapshot metadata for [" + snapshotId + "] to [" - + INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(indexGeneration.toBlobNamePart()) + + INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(indexGeneration.getGenerationUUID()) + "]", e ); @@ -3310,7 +3310,7 @@ private void doSnapshotShard(SnapshotShardContext context) { // When not using shard generations we can only write the index-${N} blob after all other work for this shard has // completed. // Also, in case of numeric shard generations the data node has to take care of deleting old shard generations. - final long newGen = Long.parseLong(fileListGeneration.toBlobNamePart()) + 1; + final long newGen = Long.parseLong(fileListGeneration.getGenerationUUID()) + 1; indexGeneration = new ShardGeneration(newGen); // Delete all previous index-N blobs final List blobsToDelete = blobs.stream().filter(blob -> blob.startsWith(SNAPSHOT_INDEX_PREFIX)).toList(); @@ -3338,7 +3338,7 @@ private void doSnapshotShard(SnapshotShardContext context) { "Failed to finalize snapshot creation [" + snapshotId + "] with shard index [" - + INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(indexGeneration.toBlobNamePart()) + + INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(indexGeneration.getGenerationUUID()) + "]", e ); @@ -3826,7 +3826,7 @@ private Tuple buildBlobStoreIndex return new Tuple<>(BlobStoreIndexShardSnapshots.EMPTY, ShardGenerations.NEW_SHARD_GEN); } return new Tuple<>( - INDEX_SHARD_SNAPSHOTS_FORMAT.read(metadata.name(), shardContainer, generation.toBlobNamePart(), namedXContentRegistry), + INDEX_SHARD_SNAPSHOTS_FORMAT.read(metadata.name(), shardContainer, generation.getGenerationUUID(), namedXContentRegistry), generation ); } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/GetSnapshotInfoContext.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/GetSnapshotInfoContext.java index 96782bca31a15..3338a3c2e2a76 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/GetSnapshotInfoContext.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/GetSnapshotInfoContext.java @@ -22,7 +22,7 @@ import java.util.function.BooleanSupplier; /** - * Describes the context of fetching one or more {@link SnapshotInfo} via {@link Repository#getSnapshotInfo}. + * A context through which a consumer can act on one or more {@link SnapshotInfo} via {@link Repository#getSnapshotInfo}. */ final class GetSnapshotInfoContext implements ActionListener { diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/package-info.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/package-info.java index 48d8a0730f48c..5bc09e4653d16 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/package-info.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/package-info.java @@ -59,8 +59,8 @@ * | | |- snap-20131011.dat - SMILE serialized {@link org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot} for * | | | snapshot "20131011" * | | |- index-123 - SMILE serialized {@link org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshots} for - * | | | the shard (files with numeric suffixes were created by older versions, newer ES versions use a uuid - * | | | suffix instead) + * | | | the shard. The suffix is the {@link org.elasticsearch.repositories.ShardGeneration } (files with + * | | | numeric suffixes were created by older versions, newer ES versions use a uuid suffix instead) * | | * | |- 1/ - data for shard "1" of index "foo" * | | |- __1 @@ -158,20 +158,23 @@ * *

      *
    1. Create the {@link org.apache.lucene.index.IndexCommit} for the shard to snapshot.
    2. - *
    3. Get the {@link org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshots} blob - * with name {@code index-${uuid}} with the {@code uuid} generation returned by - * {@link org.elasticsearch.repositories.ShardGenerations#getShardGen} to get the information of what segment files are - * already available in the blobstore.
    4. - *
    5. By comparing the files in the {@code IndexCommit} and the available file list from the previous step, determine the segment files - * that need to be written to the blob store. For each segment that needs to be added to the blob store, generate a unique name by combining - * the segment data blob prefix {@code __} and a UUID and write the segment to the blobstore.
    6. - *
    7. After completing all segment writes, a blob containing a + *
    8. Get the current {@link org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshots} blob file with name + * {@code index-${uuid}} by loading the index shard's generation {@code uuid} from {@link org.elasticsearch.repositories.ShardGenerations} + * (via {@link org.elasticsearch.repositories.ShardGenerations#getShardGen}). This blob file will list what segment files are already + * available in the blobstore.
    9. + *
    10. By comparing the files in the {@code IndexCommit} and the available file list from the previous step's blob file, determine the new + * segment files that need to be written to the blob store. For each segment that needs to be added to the blob store, generate a unique + * name by combining the segment data blob prefix {@code __} and a new UUID and write the segment to the blobstore.
    11. + *
    12. After completing all segment writes, a new blob file containing the new shard snapshot's * {@link org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot} with name {@code snap-${snapshot-uuid}.dat} is written to * the shard's path and contains a list of all the files referenced by the snapshot as well as some metadata about the snapshot. See the * documentation of {@code BlobStoreIndexShardSnapshot} for details on its contents.
    13. *
    14. Once all the segments and the {@code BlobStoreIndexShardSnapshot} blob have been written, an updated * {@code BlobStoreIndexShardSnapshots} blob is written to the shard's path with name {@code index-${newUUID}}.
    15. *
    + * At this point, all of the necessary shard data and shard metadata for the new shard snapshot have been written to the repository, but the + * metadata outside of the shard directory has not been updated to point to the new shard snapshot as the latest. The next finalization step + * will handle updates external to the index shard directory, and add references in the root directory. * *

    Finalizing the Snapshot

    * @@ -180,10 +183,10 @@ * following actions in order:

    *
      *
    1. Write a blob containing the cluster metadata to the root of the blob store repository at {@code /meta-${snapshot-uuid}.dat}
    2. - *
    3. Write the metadata for each index to a blob in that index's directory at + *
    4. Write the metadata for the index to a blob in that index's directory at * {@code /indices/${index-snapshot-uuid}/meta-${snapshot-uuid}.dat}
    5. - *
    6. Write the {@link org.elasticsearch.snapshots.SnapshotInfo} blob for the given snapshot to the key {@code /snap-${snapshot-uuid}.dat} - * directly under the repository root.
    7. + *
    8. Write the {@link org.elasticsearch.snapshots.SnapshotInfo} blob for the given snapshot in a new blob file + * {@code /snap-${snapshot-uuid}.dat} directly under the repository root.
    9. *
    10. Write an updated {@code RepositoryData} blob containing the new snapshot.
    11. *
    * diff --git a/server/src/main/java/org/elasticsearch/snapshots/InFlightShardSnapshotStates.java b/server/src/main/java/org/elasticsearch/snapshots/InFlightShardSnapshotStates.java index 82872ac423252..f80ed0bc6f7e4 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/InFlightShardSnapshotStates.java +++ b/server/src/main/java/org/elasticsearch/snapshots/InFlightShardSnapshotStates.java @@ -48,7 +48,8 @@ public static InFlightShardSnapshotStates forEntries(List shard : runningSnapshot.shardsByRepoShardId() + for (Map.Entry shard : runningSnapshot + .shardSnapshotStatusByRepoShardId() .entrySet()) { final RepositoryShardId sid = shard.getKey(); addStateInformation(generations, busyIds, shard.getValue(), sid.shardId(), sid.indexName()); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotInfo.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotInfo.java index 1a022d08d3a24..286b08a0d3f3c 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotInfo.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotInfo.java @@ -177,14 +177,15 @@ public SnapshotInfo( public static SnapshotInfo inProgress(SnapshotsInProgress.Entry entry) { int successfulShards = 0; List shardFailures = new ArrayList<>(); - for (Map.Entry c : entry.shardsByRepoShardId().entrySet()) { + for (Map.Entry c : entry.shardSnapshotStatusByRepoShardId() + .entrySet()) { if (c.getValue().state() == SnapshotsInProgress.ShardState.SUCCESS) { successfulShards++; } else if (c.getValue().state().failed() && c.getValue().state().completed()) { shardFailures.add(new SnapshotShardFailure(c.getValue().nodeId(), entry.shardId(c.getKey()), c.getValue().reason())); } } - int totalShards = entry.shardsByRepoShardId().size(); + int totalShards = entry.shardSnapshotStatusByRepoShardId().size(); return new SnapshotInfo( entry.snapshot(), List.copyOf(entry.indices().keySet()), diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java index 1529ef556037a..ef8840c90be0a 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java @@ -20,7 +20,6 @@ import org.elasticsearch.cluster.SnapshotsInProgress; import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus; import org.elasticsearch.cluster.SnapshotsInProgress.ShardState; -import org.elasticsearch.cluster.SnapshotsInProgress.State; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Strings; @@ -67,6 +66,7 @@ * This service runs on data nodes and controls currently running shard snapshots on these nodes. It is responsible for * starting and stopping shard level snapshots. * See package level documentation of {@link org.elasticsearch.snapshots} for details. + * See {@link SnapshotsService} for the master node snapshotting steps. */ public final class SnapshotShardsService extends AbstractLifecycleComponent implements ClusterStateListener, IndexEventListener { private static final Logger logger = LogManager.getLogger(SnapshotShardsService.class); @@ -205,6 +205,9 @@ public Map currentSnapshotShards(Snapsho } } + /** + * Cancels any snapshots that have been removed from the given list of SnapshotsInProgress. + */ private void cancelRemoved(SnapshotsInProgress snapshotsInProgress) { // First, remove snapshots that are no longer there Iterator>> it = shardSnapshots.entrySet().iterator(); @@ -250,7 +253,7 @@ private void handleUpdatedSnapshotsInProgressEntry(String localNodeId, boolean r // Abort all running shards for this snapshot final Snapshot snapshot = entry.snapshot(); Map snapshotShards = shardSnapshots.getOrDefault(snapshot, emptyMap()); - for (Map.Entry shard : entry.shardsByRepoShardId().entrySet()) { + for (Map.Entry shard : entry.shardSnapshotStatusByRepoShardId().entrySet()) { final ShardId sid = entry.shardId(shard.getKey()); final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(sid); if (snapshotStatus == null) { @@ -561,7 +564,7 @@ public static String getShardStateId(IndexShard indexShard, IndexCommit snapshot */ private void syncShardStatsOnNewMaster(List entries) { for (SnapshotsInProgress.Entry snapshot : entries) { - if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) { + if (snapshot.state() == SnapshotsInProgress.State.STARTED || snapshot.state() == SnapshotsInProgress.State.ABORTED) { final Map localShards; synchronized (shardSnapshots) { final var currentLocalShards = shardSnapshots.get(snapshot.snapshot()); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 9178050ff2a0b..75b5a4e6a2ea6 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -38,7 +38,6 @@ import org.elasticsearch.cluster.SnapshotsInProgress; import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus; import org.elasticsearch.cluster.SnapshotsInProgress.ShardState; -import org.elasticsearch.cluster.SnapshotsInProgress.State; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; import org.elasticsearch.cluster.metadata.DataStream; @@ -134,6 +133,7 @@ * Service responsible for creating snapshots. This service runs all the steps executed on the master node during snapshot creation and * deletion. * See package level documentation of {@link org.elasticsearch.snapshots} for details. + * See {@link SnapshotShardsService} for the data node snapshotting steps. */ public final class SnapshotsService extends AbstractLifecycleComponent implements ClusterStateApplier { @@ -179,7 +179,7 @@ public final class SnapshotsService extends AbstractLifecycleComponent implement // Set of snapshots that are currently being ended by this node private final Set endingSnapshots = Collections.synchronizedSet(new HashSet<>()); - // Set of currently initializing clone operations + /** Set of currently initializing clone operations */ private final Set initializingClones = Collections.synchronizedSet(new HashSet<>()); private final UpdateSnapshotStatusAction updateSnapshotStatusHandler; @@ -288,6 +288,9 @@ public void createSnapshot(final CreateSnapshotRequest request, final ActionList submitCreateSnapshotRequest(request, listener, repository, new Snapshot(repositoryName, snapshotId), repository.getMetadata()); } + /** + * Updates the cluster state with the new {@link CreateSnapshotRequest}, which triggers async snapshot creation. + */ private void submitCreateSnapshotRequest( CreateSnapshotRequest request, ActionListener listener, @@ -408,6 +411,9 @@ public void clusterStateProcessed(ClusterState oldState, final ClusterState newS }, "clone_snapshot [" + request.source() + "][" + snapshotName + ']', listener::onFailure); } + /** + * Checks the cluster state for any in-progress repository cleanup tasks ({@link RepositoryCleanupInProgress}). + */ private static void ensureNoCleanupInProgress( final ClusterState currentState, final String repositoryName, @@ -570,7 +576,8 @@ public void clusterStateProcessed(ClusterState oldState, ClusterState newState) if (updatedEntry != null) { final Snapshot target = updatedEntry.snapshot(); final SnapshotId sourceSnapshot = updatedEntry.source(); - for (Map.Entry indexClone : updatedEntry.shardsByRepoShardId().entrySet()) { + for (Map.Entry indexClone : updatedEntry.shardSnapshotStatusByRepoShardId() + .entrySet()) { final ShardSnapshotStatus shardStatusBefore = indexClone.getValue(); if (shardStatusBefore.state() != ShardState.INIT) { continue; @@ -579,7 +586,7 @@ public void clusterStateProcessed(ClusterState oldState, ClusterState newState) runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository); } } else { - // Extremely unlikely corner case of master failing over between between starting the clone and + // Extremely unlikely corner case of master failing over between starting the clone and // starting shard clones. logger.warn("Did not find expected entry [{}] in the cluster state", cloneEntry); } @@ -739,9 +746,9 @@ private static void validate(final String repositoryName, final String snapshotN private static ShardGenerations buildGenerations(SnapshotsInProgress.Entry snapshot, Metadata metadata) { ShardGenerations.Builder builder = ShardGenerations.builder(); if (snapshot.isClone()) { - snapshot.shardsByRepoShardId().forEach((key, value) -> builder.put(key.index(), key.shardId(), value)); + snapshot.shardSnapshotStatusByRepoShardId().forEach((key, value) -> builder.put(key.index(), key.shardId(), value)); } else { - snapshot.shardsByRepoShardId().forEach((key, value) -> { + snapshot.shardSnapshotStatusByRepoShardId().forEach((key, value) -> { final Index index = snapshot.indexByName(key.indexName()); if (metadata.index(index) == null) { assert snapshot.partial() : "Index [" + index + "] was deleted during a snapshot but snapshot was not partial."; @@ -936,7 +943,7 @@ private static boolean assertNoDanglingSnapshots(ClusterState state) { .collect(Collectors.toSet()); for (List repoEntry : snapshotsInProgress.entriesByRepo()) { final SnapshotsInProgress.Entry entry = repoEntry.get(0); - for (ShardSnapshotStatus value : entry.shardsByRepoShardId().values()) { + for (ShardSnapshotStatus value : entry.shardSnapshotStatusByRepoShardId().values()) { if (value.equals(ShardSnapshotStatus.UNASSIGNED_QUEUED)) { assert reposWithRunningDelete.contains(entry.repository()) : "Found shard snapshot waiting to be assigned in [" + entry + "] but it is not blocked by any running delete"; @@ -981,19 +988,18 @@ private void processExternalChanges(boolean changedNodes, boolean changedShards) @Override public ClusterState execute(ClusterState currentState) { RoutingTable routingTable = currentState.routingTable(); - final SnapshotsInProgress snapshots = SnapshotsInProgress.get(currentState); - final SnapshotDeletionsInProgress deletes = SnapshotDeletionsInProgress.get(currentState); + final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(currentState); + final SnapshotDeletionsInProgress deletesInProgress = SnapshotDeletionsInProgress.get(currentState); DiscoveryNodes nodes = currentState.nodes(); - final EnumSet statesToUpdate; - // If we are reacting to a change in the cluster node configuration we have to update the shard states of both started - // and - // aborted snapshots to potentially fail shards running on the removed nodes + final EnumSet statesToUpdate; if (changedNodes) { - statesToUpdate = EnumSet.of(State.STARTED, State.ABORTED); + // If we are reacting to a change in the cluster node configuration we have to update the shard states of both started + // and aborted snapshots to potentially fail shards running on the removed nodes + statesToUpdate = EnumSet.of(SnapshotsInProgress.State.STARTED, SnapshotsInProgress.State.ABORTED); } else { // We are reacting to shards that started only so which only affects the individual shard states of started // snapshots - statesToUpdate = EnumSet.of(State.STARTED); + statesToUpdate = EnumSet.of(SnapshotsInProgress.State.STARTED); } // We keep a cache of shards that failed in this map. If we fail a shardId for a given repository because of @@ -1003,9 +1009,9 @@ public ClusterState execute(ClusterState currentState) { // TODO: the code in this state update duplicates large chunks of the logic in #SHARD_STATE_EXECUTOR. // We should refactor it to ideally also go through #SHARD_STATE_EXECUTOR by hand-crafting shard state updates // that encapsulate nodes leaving or indices having been deleted and passing them to the executor instead. - SnapshotsInProgress updatedSnapshots = snapshots; + SnapshotsInProgress updatedSnapshots = snapshotsInProgress; - for (final List snapshotsInRepo : snapshots.entriesByRepo()) { + for (final List snapshotsInRepo : snapshotsInProgress.entriesByRepo()) { boolean changed = false; final List updatedEntriesForRepo = new ArrayList<>(); final Map knownFailures = new HashMap<>(); @@ -1013,17 +1019,17 @@ public ClusterState execute(ClusterState currentState) { for (SnapshotsInProgress.Entry snapshotEntry : snapshotsInRepo) { if (statesToUpdate.contains(snapshotEntry.state())) { if (snapshotEntry.isClone()) { - if (snapshotEntry.shardsByRepoShardId().isEmpty()) { + if (snapshotEntry.shardSnapshotStatusByRepoShardId().isEmpty()) { // Currently initializing clone if (initializingClones.contains(snapshotEntry.snapshot())) { updatedEntriesForRepo.add(snapshotEntry); } else { - logger.debug("removing not yet start clone operation [{}]", snapshotEntry); + logger.debug("removing not yet started clone operation [{}]", snapshotEntry); changed = true; } } else { // see if any clones may have had a shard become available for execution because of failures - if (deletes.hasExecutingDeletion(repositoryName)) { + if (deletesInProgress.hasExecutingDeletion(repositoryName)) { // Currently executing a delete for this repo, no need to try and update any clone operations. // The logic for finishing the delete will update running clones with the latest changes. updatedEntriesForRepo.add(snapshotEntry); @@ -1033,7 +1039,7 @@ public ClusterState execute(ClusterState currentState) { InFlightShardSnapshotStates inFlightShardSnapshotStates = null; for (Map.Entry failureEntry : knownFailures.entrySet()) { final RepositoryShardId repositoryShardId = failureEntry.getKey(); - final ShardSnapshotStatus existingStatus = snapshotEntry.shardsByRepoShardId() + final ShardSnapshotStatus existingStatus = snapshotEntry.shardSnapshotStatusByRepoShardId() .get(repositoryShardId); if (ShardSnapshotStatus.UNASSIGNED_QUEUED.equals(existingStatus)) { if (inFlightShardSnapshotStates == null) { @@ -1047,7 +1053,7 @@ public ClusterState execute(ClusterState currentState) { continue; } if (clones == null) { - clones = ImmutableOpenMap.builder(snapshotEntry.shardsByRepoShardId()); + clones = ImmutableOpenMap.builder(snapshotEntry.shardSnapshotStatusByRepoShardId()); } // We can use the generation from the shard failure to start the clone operation here // because #processWaitingShardsAndRemovedNodes adds generations to failure statuses that @@ -1075,7 +1081,7 @@ public ClusterState execute(ClusterState currentState) { snapshotEntry, routingTable, nodes, - snapshots::isNodeIdForRemoval, + snapshotsInProgress::isNodeIdForRemoval, knownFailures ); if (shards != null) { @@ -1098,7 +1104,7 @@ public ClusterState execute(ClusterState currentState) { } else { // Now we're down to completed or un-modified snapshots - if (snapshotEntry.state().completed() || completed(snapshotEntry.shardsByRepoShardId().values())) { + if (snapshotEntry.state().completed() || completed(snapshotEntry.shardSnapshotStatusByRepoShardId().values())) { finishedSnapshots.add(snapshotEntry); } updatedEntriesForRepo.add(snapshotEntry); @@ -1109,7 +1115,7 @@ public ClusterState execute(ClusterState currentState) { } } final ClusterState res = readyDeletions( - updatedSnapshots != snapshots + updatedSnapshots != snapshotsInProgress ? ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, updatedSnapshots).build() : currentState ).v1(); @@ -1176,7 +1182,8 @@ private static ImmutableOpenMap processWaitingShar assert snapshotEntry.isClone() == false : "clones take a different path"; boolean snapshotChanged = false; ImmutableOpenMap.Builder shards = ImmutableOpenMap.builder(); - for (Map.Entry shardSnapshotEntry : snapshotEntry.shardsByRepoShardId().entrySet()) { + for (Map.Entry shardSnapshotEntry : snapshotEntry.shardSnapshotStatusByRepoShardId() + .entrySet()) { ShardSnapshotStatus shardStatus = shardSnapshotEntry.getValue(); ShardId shardId = snapshotEntry.shardId(shardSnapshotEntry.getKey()); if (shardStatus.equals(ShardSnapshotStatus.UNASSIGNED_QUEUED)) { @@ -1239,7 +1246,7 @@ private static ImmutableOpenMap processWaitingShar } // Shard that we were waiting for went into unassigned state or disappeared (index or shard is gone) - giving up snapshotChanged = true; - logger.warn("failing snapshot of shard [{}] on unassigned shard [{}]", shardId, shardStatus.nodeId()); + logger.warn("failing snapshot of shard [{}] on node [{}] because shard is unassigned", shardId, shardStatus.nodeId()); final ShardSnapshotStatus failedState = new ShardSnapshotStatus( shardStatus.nodeId(), ShardState.FAILED, @@ -1278,8 +1285,9 @@ private static ImmutableOpenMap processWaitingShar private static boolean waitingShardsStartedOrUnassigned(SnapshotsInProgress snapshotsInProgress, ClusterChangedEvent event) { for (List entries : snapshotsInProgress.entriesByRepo()) { for (SnapshotsInProgress.Entry entry : entries) { - if (entry.state() == State.STARTED && entry.isClone() == false) { - for (Map.Entry shardStatus : entry.shardsByRepoShardId().entrySet()) { + if (entry.state() == SnapshotsInProgress.State.STARTED && entry.isClone() == false) { + for (Map.Entry shardStatus : entry.shardSnapshotStatusByRepoShardId() + .entrySet()) { final ShardState state = shardStatus.getValue().state(); if (state != ShardState.WAITING && state != ShardState.QUEUED && state != ShardState.PAUSED_FOR_NODE_REMOVAL) { continue; @@ -1317,7 +1325,7 @@ private static boolean removedNodesCleanupNeeded(SnapshotsInProgress snapshotsIn // nothing to do for already completed snapshots or clones that run on master anyways return false; } - for (ShardSnapshotStatus shardSnapshotStatus : snapshot.shardsByRepoShardId().values()) { + for (ShardSnapshotStatus shardSnapshotStatus : snapshot.shardSnapshotStatusByRepoShardId().values()) { if (shardSnapshotStatus.state().completed() == false && removedNodeIds.contains(shardSnapshotStatus.nodeId())) { // Snapshot had an incomplete shard running on a removed node so we need to adjust that shard's snapshot status return true; @@ -1335,7 +1343,7 @@ private static boolean removedNodesCleanupNeeded(SnapshotsInProgress snapshotsIn private void endSnapshot(SnapshotsInProgress.Entry entry, Metadata metadata, @Nullable RepositoryData repositoryData) { final Snapshot snapshot = entry.snapshot(); final boolean newFinalization = endingSnapshots.add(snapshot); - if (entry.isClone() && entry.state() == State.FAILED) { + if (entry.isClone() && entry.state() == SnapshotsInProgress.State.FAILED) { logger.debug("Removing failed snapshot clone [{}] from cluster state", entry); if (newFinalization) { removeFailedSnapshotFromClusterState( @@ -1415,7 +1423,7 @@ private void finalizeSnapshotEntry(Snapshot snapshot, Metadata metadata, Reposit final List finalIndices = shardGenerations.indices().stream().map(IndexId::getName).toList(); final Set indexNames = new HashSet<>(finalIndices); ArrayList shardFailures = new ArrayList<>(); - for (Map.Entry shardStatus : entry.shardsByRepoShardId().entrySet()) { + for (Map.Entry shardStatus : entry.shardSnapshotStatusByRepoShardId().entrySet()) { RepositoryShardId shardId = shardStatus.getKey(); if (indexNames.contains(shardId.indexName()) == false) { assert entry.partial() : "only ignoring shard failures for concurrently deleted indices for partial snapshots"; @@ -1467,7 +1475,7 @@ private void finalizeSnapshotEntry(Snapshot snapshot, Metadata metadata, Reposit final Map indexSnapshotDetails = Maps.newMapWithExpectedSize( finalIndices.size() ); - for (Map.Entry shardEntry : entry.shardsByRepoShardId().entrySet()) { + for (Map.Entry shardEntry : entry.shardSnapshotStatusByRepoShardId().entrySet()) { indexSnapshotDetails.compute(shardEntry.getKey().indexName(), (indexName, current) -> { if (current == SnapshotInfo.IndexSnapshotDetails.SKIPPED) { // already found an unsuccessful shard in this index, skip this shard @@ -1506,7 +1514,7 @@ private void finalizeSnapshotEntry(Snapshot snapshot, Metadata metadata, Reposit entry.partial() ? onlySuccessfulFeatureStates(entry, finalIndices) : entry.featureStates(), failure, threadPool.absoluteTimeInMillis(), - entry.partial() ? shardGenerations.totalShards() : entry.shardsByRepoShardId().size(), + entry.partial() ? shardGenerations.totalShards() : entry.shardSnapshotStatusByRepoShardId().size(), shardFailures, entry.includeGlobalState(), entry.userMetadata(), @@ -1579,7 +1587,7 @@ private static List onlySuccessfulFeatureStates(SnapshotsIn // Figure out which indices have unsuccessful shards Set indicesWithUnsuccessfulShards = new HashSet<>(); - entry.shardsByRepoShardId().forEach((key, value) -> { + entry.shardSnapshotStatusByRepoShardId().forEach((key, value) -> { final ShardState shardState = value.state(); if (shardState.failed() || shardState.completed() == false) { indicesWithUnsuccessfulShards.add(key.indexName()); @@ -1749,16 +1757,21 @@ private static Tuple> read * Computes the cluster state resulting from removing a given snapshot create operation from the given state. This method will update * the shard generations of snapshots that the given snapshot depended on so that finalizing them will not cause rolling back to an * outdated shard generation. + *

    + * For example, shard snapshot X can be taken, but not finalized yet. Shard snapshot Y can then depend upon shard snapshot X. Then shard + * snapshot Y may finalize before shard snapshot X, but including X. However, X does not include Y. Therefore we update X to use Y's + * shard generation file (list of snapshots and dependencies) to avoid overwriting with X's file that is missing Y. * * @param state current cluster state * @param snapshot snapshot for which to remove the snapshot operation * @return updated cluster state */ public static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot snapshot, ShardGenerations shardGenerations) { - final SnapshotsInProgress snapshots = SnapshotsInProgress.get(state); + final SnapshotsInProgress inProgressSnapshots = SnapshotsInProgress.get(state); ClusterState result = state; int indexOfEntry = -1; - final List entryList = snapshots.forRepo(snapshot.getRepository()); + // Find the in-progress snapshot entry that matches {@code snapshot}. + final List entryList = inProgressSnapshots.forRepo(snapshot.getRepository()); for (int i = 0; i < entryList.size(); i++) { SnapshotsInProgress.Entry entry = entryList.get(i); if (entry.snapshot().equals(snapshot)) { @@ -1767,14 +1780,15 @@ public static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot sna } } if (indexOfEntry >= 0) { - final List entries = new ArrayList<>(entryList.size() - 1); + final List updatedEntries = new ArrayList<>(entryList.size() - 1); final SnapshotsInProgress.Entry removedEntry = entryList.get(indexOfEntry); for (int i = 0; i < indexOfEntry; i++) { final SnapshotsInProgress.Entry previousEntry = entryList.get(i); if (removedEntry.isClone()) { if (previousEntry.isClone()) { ImmutableOpenMap.Builder updatedShardAssignments = null; - for (Map.Entry finishedShardEntry : removedEntry.shardsByRepoShardId() + for (Map.Entry finishedShardEntry : removedEntry + .shardSnapshotStatusByRepoShardId() .entrySet()) { final ShardSnapshotStatus shardState = finishedShardEntry.getValue(); if (shardState.state() == ShardState.SUCCESS) { @@ -1782,19 +1796,20 @@ public static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot sna updatedShardAssignments, shardState, finishedShardEntry.getKey(), - previousEntry.shardsByRepoShardId() + previousEntry.shardSnapshotStatusByRepoShardId() ); } } - addCloneEntry(entries, previousEntry, updatedShardAssignments); + addCloneEntry(updatedEntries, previousEntry, updatedShardAssignments); } else { ImmutableOpenMap.Builder updatedShardAssignments = null; - for (Map.Entry finishedShardEntry : removedEntry.shardsByRepoShardId() + for (Map.Entry finishedShardEntry : removedEntry + .shardSnapshotStatusByRepoShardId() .entrySet()) { final ShardSnapshotStatus shardState = finishedShardEntry.getValue(); final RepositoryShardId repositoryShardId = finishedShardEntry.getKey(); if (shardState.state() != ShardState.SUCCESS - || previousEntry.shardsByRepoShardId().containsKey(repositoryShardId) == false) { + || previousEntry.shardSnapshotStatusByRepoShardId().containsKey(repositoryShardId) == false) { continue; } updatedShardAssignments = maybeAddUpdatedAssignment( @@ -1805,17 +1820,18 @@ public static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot sna ); } - addSnapshotEntry(entries, previousEntry, updatedShardAssignments); + addSnapshotEntry(updatedEntries, previousEntry, updatedShardAssignments); } } else { if (previousEntry.isClone()) { ImmutableOpenMap.Builder updatedShardAssignments = null; - for (Map.Entry finishedShardEntry : removedEntry.shardsByRepoShardId() + for (Map.Entry finishedShardEntry : removedEntry + .shardSnapshotStatusByRepoShardId() .entrySet()) { final ShardSnapshotStatus shardState = finishedShardEntry.getValue(); final RepositoryShardId repositoryShardId = finishedShardEntry.getKey(); if (shardState.state() != ShardState.SUCCESS - || previousEntry.shardsByRepoShardId().containsKey(repositoryShardId) == false + || previousEntry.shardSnapshotStatusByRepoShardId().containsKey(repositoryShardId) == false || shardGenerations.hasShardGen(finishedShardEntry.getKey()) == false) { continue; } @@ -1823,17 +1839,18 @@ public static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot sna updatedShardAssignments, shardState, repositoryShardId, - previousEntry.shardsByRepoShardId() + previousEntry.shardSnapshotStatusByRepoShardId() ); } - addCloneEntry(entries, previousEntry, updatedShardAssignments); + addCloneEntry(updatedEntries, previousEntry, updatedShardAssignments); } else { ImmutableOpenMap.Builder updatedShardAssignments = null; - for (Map.Entry finishedShardEntry : removedEntry.shardsByRepoShardId() + for (Map.Entry finishedShardEntry : removedEntry + .shardSnapshotStatusByRepoShardId() .entrySet()) { final ShardSnapshotStatus shardState = finishedShardEntry.getValue(); if (shardState.state() == ShardState.SUCCESS - && previousEntry.shardsByRepoShardId().containsKey(finishedShardEntry.getKey()) + && previousEntry.shardSnapshotStatusByRepoShardId().containsKey(finishedShardEntry.getKey()) && shardGenerations.hasShardGen(finishedShardEntry.getKey())) { updatedShardAssignments = maybeAddUpdatedAssignment( updatedShardAssignments, @@ -1843,15 +1860,18 @@ public static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot sna ); } } - addSnapshotEntry(entries, previousEntry, updatedShardAssignments); + addSnapshotEntry(updatedEntries, previousEntry, updatedShardAssignments); } } } for (int i = indexOfEntry + 1; i < entryList.size(); i++) { - entries.add(entryList.get(i)); + updatedEntries.add(entryList.get(i)); } result = ClusterState.builder(state) - .putCustom(SnapshotsInProgress.TYPE, snapshots.withUpdatedEntriesForRepo(snapshot.getRepository(), entries)) + .putCustom( + SnapshotsInProgress.TYPE, + inProgressSnapshots.withUpdatedEntriesForRepo(snapshot.getRepository(), updatedEntries) + ) .build(); } return readyDeletions(result).v1(); @@ -1880,7 +1900,7 @@ private static void addCloneEntry( entries.add(entryToUpdate); } else { final ImmutableOpenMap.Builder updatedStatus = ImmutableOpenMap.builder( - entryToUpdate.shardsByRepoShardId() + entryToUpdate.shardSnapshotStatusByRepoShardId() ); updatedStatus.putAllFromMap(updatedShardAssignments.build()); entries.add(entryToUpdate.withClones(updatedStatus.build())); @@ -2123,7 +2143,7 @@ public ClusterState execute(ClusterState currentState) { final SnapshotsInProgress updatedSnapshots = snapshotsInProgress.withUpdatedEntriesForRepo( repositoryName, snapshotsInProgress.forRepo(repositoryName).stream().map(existing -> { - if (existing.state() == State.STARTED + if (existing.state() == SnapshotsInProgress.State.STARTED && snapshotIdsRequiringCleanup.contains(existing.snapshot().getSnapshotId())) { // snapshot is started - mark every non completed shard as aborted final SnapshotsInProgress.Entry abortedEntry = existing.abort(); @@ -2257,7 +2277,7 @@ private static boolean isWritingToRepository(SnapshotsInProgress.Entry entry) { // Entry is writing to the repo because it's finalizing on master return true; } - for (ShardSnapshotStatus value : entry.shardsByRepoShardId().values()) { + for (ShardSnapshotStatus value : entry.shardSnapshotStatusByRepoShardId().values()) { if (value.isActive()) { // Entry is writing to the repo because it's writing to a shard on a data node or waiting to do so for a concrete shard return true; @@ -2746,7 +2766,8 @@ private SnapshotsInProgress updatedSnapshotsInProgress(ClusterState currentState if (entry.isClone()) { // Collect waiting shards from that entry that we can assign now that we are done with the deletion final List canBeUpdated = new ArrayList<>(); - for (Map.Entry value : entry.shardsByRepoShardId().entrySet()) { + for (Map.Entry value : entry.shardSnapshotStatusByRepoShardId() + .entrySet()) { if (value.getValue().equals(ShardSnapshotStatus.UNASSIGNED_QUEUED) && reassignedShardIds.contains(value.getKey()) == false) { canBeUpdated.add(value.getKey()); @@ -2762,7 +2783,7 @@ private SnapshotsInProgress updatedSnapshotsInProgress(ClusterState currentState inFlightShardStates = InFlightShardSnapshotStates.forEntries(snapshotsInProgress.forRepo(repoName)); } final ImmutableOpenMap.Builder updatedAssignmentsBuilder = - ImmutableOpenMap.builder(entry.shardsByRepoShardId()); + ImmutableOpenMap.builder(entry.shardSnapshotStatusByRepoShardId()); for (RepositoryShardId shardId : canBeUpdated) { if (inFlightShardStates.isActive(shardId.indexName(), shardId.shardId()) == false) { markShardReassigned(shardId, reassignedShardIds); @@ -2785,7 +2806,8 @@ private SnapshotsInProgress updatedSnapshotsInProgress(ClusterState currentState } else { // Collect waiting shards that in entry that we can assign now that we are done with the deletion final List canBeUpdated = new ArrayList<>(); - for (Map.Entry value : entry.shardsByRepoShardId().entrySet()) { + for (Map.Entry value : entry.shardSnapshotStatusByRepoShardId() + .entrySet()) { final RepositoryShardId repositoryShardId = value.getKey(); if (value.getValue().equals(ShardSnapshotStatus.UNASSIGNED_QUEUED) && reassignedShardIds.contains(repositoryShardId) == false) { @@ -3272,7 +3294,12 @@ SnapshotsInProgress.Entry computeUpdatedEntry() { if (entry.snapshot().getSnapshotId().equals(update.snapshot.getSnapshotId())) { // update a currently running shard level operation if (update.isClone()) { - executeShardSnapshotUpdate(entry.shardsByRepoShardId(), this::clonesBuilder, update, update.repoShardId); + executeShardSnapshotUpdate( + entry.shardSnapshotStatusByRepoShardId(), + this::clonesBuilder, + update, + update.repoShardId + ); } else { executeShardSnapshotUpdate(entry.shards(), this::shardsBuilder, update, update.shardId); } @@ -3398,7 +3425,7 @@ private void tryStartNextTaskAfterCloneUpdated(RepositoryShardId repoShardId, Sh // start a shard snapshot or clone operation on the current entry if (entry.isClone() == false) { tryStartSnapshotAfterCloneFinish(repoShardId, updatedState.generation()); - } else if (isQueued(entry.shardsByRepoShardId().get(repoShardId))) { + } else if (isQueued(entry.shardSnapshotStatusByRepoShardId().get(repoShardId))) { final String localNodeId = initialState.nodes().getLocalNodeId(); assert updatedState.nodeId().equals(localNodeId) : "Clone updated with node id [" + updatedState.nodeId() + "] but local node id is [" + localNodeId + "]"; @@ -3412,7 +3439,7 @@ private void tryStartNextTaskAfterSnapshotUpdated(ShardId shardId, ShardSnapshot final IndexId indexId = entry.indices().get(shardId.getIndexName()); if (indexId != null) { final RepositoryShardId repoShardId = new RepositoryShardId(indexId, shardId.id()); - if (isQueued(entry.shardsByRepoShardId().get(repoShardId))) { + if (isQueued(entry.shardSnapshotStatusByRepoShardId().get(repoShardId))) { if (entry.isClone()) { // shard snapshot was completed, we check if we can start a clone operation for the same repo shard startShardOperation( @@ -3431,7 +3458,7 @@ private void tryStartNextTaskAfterSnapshotUpdated(ShardId shardId, ShardSnapshot private void tryStartSnapshotAfterCloneFinish(RepositoryShardId repoShardId, ShardGeneration generation) { assert entry.source() == null; // current entry is a snapshot operation so we must translate the repository shard id to a routing shard id - if (isQueued(entry.shardsByRepoShardId().get(repoShardId))) { + if (isQueued(entry.shardSnapshotStatusByRepoShardId().get(repoShardId))) { startShardSnapshot(repoShardId, generation); } } @@ -3467,7 +3494,7 @@ private void startShardSnapshot(RepositoryShardId repoShardId, ShardGeneration g private ImmutableOpenMap.Builder clonesBuilder() { assert shardsBuilder == null; if (clonesBuilder == null) { - clonesBuilder = ImmutableOpenMap.builder(entry.shardsByRepoShardId()); + clonesBuilder = ImmutableOpenMap.builder(entry.shardSnapshotStatusByRepoShardId()); } return clonesBuilder; } @@ -3620,9 +3647,9 @@ private void startExecutableClones(SnapshotsInProgress snapshotsInProgress, @Nul private void startExecutableClones(List entries) { for (SnapshotsInProgress.Entry entry : entries) { - if (entry.isClone() && entry.state() == State.STARTED) { + if (entry.isClone() && entry.state() == SnapshotsInProgress.State.STARTED) { // this is a clone, see if new work is ready - for (Map.Entry clone : entry.shardsByRepoShardId().entrySet()) { + for (Map.Entry clone : entry.shardSnapshotStatusByRepoShardId().entrySet()) { if (clone.getValue().state() == ShardState.INIT) { runReadyClone( entry.snapshot(), diff --git a/server/src/main/java/org/elasticsearch/snapshots/package-info.java b/server/src/main/java/org/elasticsearch/snapshots/package-info.java index 4c175bc88faf9..a6dc8021fcba8 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/package-info.java +++ b/server/src/main/java/org/elasticsearch/snapshots/package-info.java @@ -98,13 +98,13 @@ *

      *
    1. First, {@link org.elasticsearch.snapshots.SnapshotsService#cloneSnapshot} is invoked which will place a placeholder entry into * {@code SnapshotsInProgress} that does not yet contain any shard clone assignments. Note that unlike in the case of snapshot - * creation, the shard level clone tasks in {@link org.elasticsearch.cluster.SnapshotsInProgress.Entry#shardsByRepoShardId()} are not - * created in the initial cluster state update as is done for shard snapshot assignments in - * {@link org.elasticsearch.cluster.SnapshotsInProgress.Entry#shards}. This is due to the fact that shard snapshot assignments are - * computed purely from information in the current cluster state while shard clone assignments require information to be read from the - * repository, which is too slow of a process to be done inside a cluster state update. Loading this information ahead of creating a - * task in the cluster state, runs the risk of race conditions where the source snapshot is being deleted before the clone task is - * enqueued in the cluster state.
    2. + * creation, the shard level clone tasks in + * {@link org.elasticsearch.cluster.SnapshotsInProgress.Entry#shardSnapshotStatusByRepoShardId()} are not created in the initial cluster + * state update as is done for shard snapshot assignments in {@link org.elasticsearch.cluster.SnapshotsInProgress.Entry#shards}. This is + * due to the fact that shard snapshot assignments are computed purely from information in the current cluster state while shard clone + * assignments require information to be read from the repository, which is too slow of a process to be done inside a cluster state + * update. Loading this information ahead of creating a task in the cluster state, runs the risk of race conditions where the source + * snapshot is being deleted before the clone task is enqueued in the cluster state. *
    3. Once a placeholder task for the clone operation is put into the cluster state, we must determine the number of shards in each * index that is to be cloned as well as ensure the health of the index snapshots in the source snapshot. In order to determine the * shard count for each index that is to be cloned, we load the index metadata for each such index using the repository's diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index 8c9cd8cd54500..54051f8311967 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -1378,7 +1378,7 @@ public TransportRequestHandler interceptHandler( .anyMatch( e -> e.snapshot().getSnapshotId().getName().equals(cloneName) && e.isClone() - && e.shardsByRepoShardId().isEmpty() == false + && e.shardSnapshotStatusByRepoShardId().isEmpty() == false ) ).addListener(l); client.admin() diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotsServiceTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotsServiceTests.java index bcc7a23bbec53..71c041d21a825 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotsServiceTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotsServiceTests.java @@ -315,7 +315,8 @@ public void testCompletedSnapshotStartsClone() throws Exception { assertThat(completedClone.state(), is(SnapshotsInProgress.State.SUCCESS)); final SnapshotsInProgress.Entry startedSnapshot = snapshotsInProgress.forRepo(repoName).get(1); assertThat(startedSnapshot.state(), is(SnapshotsInProgress.State.STARTED)); - final SnapshotsInProgress.ShardSnapshotStatus shardCloneStatus = startedSnapshot.shardsByRepoShardId().get(repositoryShardId); + final SnapshotsInProgress.ShardSnapshotStatus shardCloneStatus = startedSnapshot.shardSnapshotStatusByRepoShardId() + .get(repositoryShardId); assertThat(shardCloneStatus.state(), is(SnapshotsInProgress.ShardState.INIT)); assertThat(shardCloneStatus.nodeId(), is(updatedClusterState.nodes().getLocalNodeId())); assertIsNoop(updatedClusterState, completeShard); @@ -397,7 +398,7 @@ public void testCompletedCloneStartsNextClone() throws Exception { assertThat(completedClone.state(), is(SnapshotsInProgress.State.SUCCESS)); final SnapshotsInProgress.Entry startedSnapshot = snapshotsInProgress.forRepo(repoName).get(1); assertThat(startedSnapshot.state(), is(SnapshotsInProgress.State.STARTED)); - assertThat(startedSnapshot.shardsByRepoShardId().get(shardId1).state(), is(SnapshotsInProgress.ShardState.INIT)); + assertThat(startedSnapshot.shardSnapshotStatusByRepoShardId().get(shardId1).state(), is(SnapshotsInProgress.ShardState.INIT)); assertIsNoop(updatedClusterState, completeShardClone); } diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java index ced1de370e0dd..c53d85a043128 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/ESBlobStoreRepositoryIntegTestCase.java @@ -607,7 +607,7 @@ public void testDanglingShardLevelBlobCleanup() throws Exception { // Prepare to compute the expected blobs final var shardGeneration = Objects.requireNonNull(getRepositoryData(repo).shardGenerations().getShardGen(indexId, 0)); final var snapBlob = Strings.format(SNAPSHOT_NAME_FORMAT, snapshot2Info.snapshotId().getUUID()); - final var indexBlob = Strings.format(SNAPSHOT_INDEX_NAME_FORMAT, shardGeneration.toBlobNamePart()); + final var indexBlob = Strings.format(SNAPSHOT_INDEX_NAME_FORMAT, shardGeneration.getGenerationUUID()); for (var fileInfos : List.of( // The expected blobs according to the BlobStoreIndexShardSnapshot (snap-UUID.dat) blob From 27e7601698e3a05e8c6d3e33b30f9ce5f6dbecd0 Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Wed, 17 Jul 2024 20:55:14 -0400 Subject: [PATCH 33/65] Directly download commercial ip geolocation databases from providers (#110844) Co-authored-by: Keith Massey --- docs/changelog/110844.yaml | 5 + .../authorization/privileges.asciidoc | 2 +- .../geoip/EnterpriseGeoIpDownloaderIT.java | 194 +++++++ .../ingest/geoip/GeoIpDownloaderIT.java | 30 +- .../src/main/java/module-info.java | 1 + .../ingest/geoip/DatabaseNodeService.java | 69 ++- .../geoip/EnterpriseGeoIpDownloader.java | 474 +++++++++++++++ ...EnterpriseGeoIpDownloaderTaskExecutor.java | 257 +++++++++ .../geoip/EnterpriseGeoIpTaskState.java | 153 +++++ .../ingest/geoip/GeoIpDownloader.java | 10 +- .../geoip/GeoIpDownloaderTaskExecutor.java | 2 +- .../ingest/geoip/GeoIpTaskState.java | 39 +- .../ingest/geoip/HttpClient.java | 26 + .../ingest/geoip/IngestGeoIpMetadata.java | 157 +++++ .../ingest/geoip/IngestGeoIpPlugin.java | 75 ++- .../geoip/direct/DatabaseConfiguration.java | 209 +++++++ .../direct/DatabaseConfigurationMetadata.java | 84 +++ .../DeleteDatabaseConfigurationAction.java | 70 +++ .../GetDatabaseConfigurationAction.java | 142 +++++ .../PutDatabaseConfigurationAction.java | 87 +++ ...RestDeleteDatabaseConfigurationAction.java | 46 ++ .../RestGetDatabaseConfigurationAction.java | 47 ++ .../RestPutDatabaseConfigurationAction.java | 52 ++ ...portDeleteDatabaseConfigurationAction.java | 128 +++++ ...ansportGetDatabaseConfigurationAction.java | 109 ++++ ...ansportPutDatabaseConfigurationAction.java | 178 ++++++ .../geoip/EnterpriseGeoIpDownloaderTests.java | 538 ++++++++++++++++++ ...priseGeoIpTaskStateSerializationTests.java | 72 +++ .../ingest/geoip/GeoIpDownloaderTests.java | 49 ++ .../geoip/IngestGeoIpMetadataTests.java | 91 +++ .../DatabaseConfigurationMetadataTests.java | 74 +++ .../direct/DatabaseConfigurationTests.java | 86 +++ ...rtPutDatabaseConfigurationActionTests.java | 69 +++ .../IngestGeoIpClientYamlTestSuiteIT.java | 5 + .../test/ingest_geoip/40_geoip_databases.yml | 72 +++ .../api/ingest.delete_geoip_database.json | 31 + .../api/ingest.get_geoip_database.json | 37 ++ .../api/ingest.put_geoip_database.json | 35 ++ server/src/main/java/module-info.java | 2 + .../org/elasticsearch/TransportVersions.java | 1 + .../ingest/EnterpriseGeoIpTask.java | 86 +++ .../ingest/IngestGeoIpFeatures.java | 22 + ...lasticsearch.features.FeatureSpecification | 1 + .../ingest/IngestServiceTests.java | 6 +- .../geoip/EnterpriseGeoIpHttpFixture.java | 125 ++++ .../resources/geoip-fixture/GeoIP2-City.tgz | Bin 0 -> 6377 bytes .../elasticsearch/xpack/core/XPackField.java | 1 + .../geoip-enterprise-downloader/build.gradle | 19 + .../geoip/EnterpriseDownloaderPlugin.java | 48 ++ ...erpriseGeoIpDownloaderLicenseListener.java | 145 +++++ ...seGeoIpDownloaderLicenseListenerTests.java | 219 +++++++ .../xpack/security/operator/Constants.java | 3 + 52 files changed, 4429 insertions(+), 54 deletions(-) create mode 100644 docs/changelog/110844.yaml create mode 100644 modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderIT.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskState.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadata.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadata.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DeleteDatabaseConfigurationAction.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestDeleteDatabaseConfigurationAction.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestPutDatabaseConfigurationAction.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java create mode 100644 modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationAction.java create mode 100644 modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java create mode 100644 modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskStateSerializationTests.java create mode 100644 modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java create mode 100644 modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java create mode 100644 modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java create mode 100644 modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationActionTests.java create mode 100644 modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/ingest.delete_geoip_database.json create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/ingest.get_geoip_database.json create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/ingest.put_geoip_database.json create mode 100644 server/src/main/java/org/elasticsearch/ingest/EnterpriseGeoIpTask.java create mode 100644 server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java create mode 100644 test/fixtures/geoip-fixture/src/main/java/fixture/geoip/EnterpriseGeoIpHttpFixture.java create mode 100644 test/fixtures/geoip-fixture/src/main/resources/geoip-fixture/GeoIP2-City.tgz create mode 100644 x-pack/plugin/geoip-enterprise-downloader/build.gradle create mode 100644 x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseDownloaderPlugin.java create mode 100644 x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListener.java create mode 100644 x-pack/plugin/geoip-enterprise-downloader/src/test/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListenerTests.java diff --git a/docs/changelog/110844.yaml b/docs/changelog/110844.yaml new file mode 100644 index 0000000000000..ea879f13f3e67 --- /dev/null +++ b/docs/changelog/110844.yaml @@ -0,0 +1,5 @@ +pr: 110844 +summary: Directly download commercial ip geolocation databases from providers +area: Ingest Node +type: feature +issues: [] diff --git a/docs/reference/security/authorization/privileges.asciidoc b/docs/reference/security/authorization/privileges.asciidoc index 44897baa8cb4a..145bd8ebc06bb 100644 --- a/docs/reference/security/authorization/privileges.asciidoc +++ b/docs/reference/security/authorization/privileges.asciidoc @@ -282,7 +282,7 @@ status of {Ilm} This privilege is not available in {serverless-full}. `read_pipeline`:: -Read-only access to ingest pipline (get, simulate). +Read-only access to ingest pipeline (get, simulate). `read_slm`:: All read-only {slm-init} actions, such as getting policies and checking the diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderIT.java new file mode 100644 index 0000000000000..d9665e180d960 --- /dev/null +++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderIT.java @@ -0,0 +1,194 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import fixture.geoip.EnterpriseGeoIpHttpFixture; + +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ResourceAlreadyExistsException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.bulk.BulkItemResponse; +import org.elasticsearch.action.bulk.BulkRequest; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.get.GetRequest; +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.ingest.PutPipelineRequest; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.MockSecureSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.core.Booleans; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.ingest.EnterpriseGeoIpTask; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction; +import org.elasticsearch.persistent.PersistentTasksService; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.reindex.ReindexPlugin; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.transport.RemoteTransportException; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.junit.ClassRule; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; + +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderTaskExecutor.MAXMIND_LICENSE_KEY_SETTING; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; + +public class EnterpriseGeoIpDownloaderIT extends ESIntegTestCase { + + private static final String DATABASE_TYPE = "GeoIP2-City"; + private static final boolean useFixture = Booleans.parseBoolean(System.getProperty("geoip_use_service", "false")) == false; + + @ClassRule + public static final EnterpriseGeoIpHttpFixture fixture = new EnterpriseGeoIpHttpFixture(useFixture, DATABASE_TYPE); + + protected String getEndpoint() { + return useFixture ? fixture.getAddress() : null; + } + + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + MockSecureSettings secureSettings = new MockSecureSettings(); + secureSettings.setString(MAXMIND_LICENSE_KEY_SETTING.getKey(), "license_key"); + Settings.Builder builder = Settings.builder(); + builder.setSecureSettings(secureSettings) + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true); + if (getEndpoint() != null) { + // note: this is using the enterprise fixture for the regular downloader, too, as + // a slightly hacky way of making the regular downloader not actually download any files + builder.put(GeoIpDownloader.ENDPOINT_SETTING.getKey(), getEndpoint()); + } + return builder.build(); + } + + @SuppressWarnings("unchecked") + protected Collection> nodePlugins() { + // the reindex plugin is (somewhat surprisingly) necessary in order to be able to delete-by-query, + // which modules/ingest-geoip does to delete old chunks + return CollectionUtils.appendToCopyNoNullElements(super.nodePlugins(), IngestGeoIpPlugin.class, ReindexPlugin.class); + } + + @SuppressWarnings("unchecked") + public void testEnterpriseDownloaderTask() throws Exception { + /* + * This test starts the enterprise geoip downloader task, and creates a database configuration. Then it creates an ingest + * pipeline that references that database, and ingests a single document using that pipeline. It then asserts that the document + * was updated with information from the database. + * Note that the "enterprise database" is actually just a geolite database being loaded by the GeoIpHttpFixture. + */ + if (getEndpoint() != null) { + EnterpriseGeoIpDownloader.DEFAULT_MAXMIND_ENDPOINT = getEndpoint(); + } + final String pipelineName = "enterprise_geoip_pipeline"; + final String indexName = "enterprise_geoip_test_index"; + final String sourceField = "ip"; + final String targetField = "ip-city"; + + startEnterpriseGeoIpDownloaderTask(); + configureDatabase(DATABASE_TYPE); + createGeoIpPipeline(pipelineName, DATABASE_TYPE, sourceField, targetField); + String documentId = ingestDocument(indexName, pipelineName, sourceField); + GetResponse getResponse = client().get(new GetRequest(indexName, documentId)).actionGet(); + Map returnedSource = getResponse.getSource(); + assertNotNull(returnedSource); + Object targetFieldValue = returnedSource.get(targetField); + assertNotNull(targetFieldValue); + assertThat(((Map) targetFieldValue).get("organization_name"), equalTo("Bredband2 AB")); + } + + private void startEnterpriseGeoIpDownloaderTask() { + PersistentTasksService persistentTasksService = internalCluster().getInstance(PersistentTasksService.class); + persistentTasksService.sendStartRequest( + ENTERPRISE_GEOIP_DOWNLOADER, + ENTERPRISE_GEOIP_DOWNLOADER, + new EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams(), + TimeValue.MAX_VALUE, + ActionListener.wrap(r -> logger.debug("Started enterprise geoip downloader task"), e -> { + Throwable t = e instanceof RemoteTransportException ? ExceptionsHelper.unwrapCause(e) : e; + if (t instanceof ResourceAlreadyExistsException == false) { + logger.error("failed to create enterprise geoip downloader task", e); + } + }) + ); + } + + private void configureDatabase(String databaseType) throws Exception { + admin().cluster() + .execute( + PutDatabaseConfigurationAction.INSTANCE, + new PutDatabaseConfigurationAction.Request( + TimeValue.MAX_VALUE, + TimeValue.MAX_VALUE, + new DatabaseConfiguration("test", databaseType, new DatabaseConfiguration.Maxmind("test_account")) + ) + ) + .actionGet(); + ensureGreen(GeoIpDownloader.DATABASES_INDEX); + assertBusy(() -> { + SearchResponse searchResponse = client().search(new SearchRequest(GeoIpDownloader.DATABASES_INDEX)).actionGet(); + try { + assertThat(searchResponse.getHits().getHits().length, equalTo(1)); + } finally { + searchResponse.decRef(); + } + }); + } + + private void createGeoIpPipeline(String pipelineName, String databaseType, String sourceField, String targetField) throws IOException { + final BytesReference bytes; + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + builder.startObject(); + { + builder.field("description", "test"); + builder.startArray("processors"); + { + builder.startObject(); + { + builder.startObject("geoip"); + { + builder.field("field", sourceField); + builder.field("target_field", targetField); + builder.field("database_file", databaseType + ".mmdb"); + } + builder.endObject(); + } + builder.endObject(); + } + builder.endArray(); + } + builder.endObject(); + bytes = BytesReference.bytes(builder); + } + assertAcked(clusterAdmin().putPipeline(new PutPipelineRequest(pipelineName, bytes, XContentType.JSON)).actionGet()); + } + + private String ingestDocument(String indexName, String pipelineName, String sourceField) { + BulkRequest bulkRequest = new BulkRequest(); + bulkRequest.add( + new IndexRequest(indexName).source("{\"" + sourceField + "\": \"89.160.20.128\"}", XContentType.JSON).setPipeline(pipelineName) + ); + BulkResponse response = client().bulk(bulkRequest).actionGet(); + BulkItemResponse[] bulkItemResponses = response.getItems(); + assertThat(bulkItemResponses.length, equalTo(1)); + assertThat(bulkItemResponses[0].status(), equalTo(RestStatus.CREATED)); + return bulkItemResponses[0].getId(); + } +} diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java index 9eab00fbadf20..f7ab384c69bf1 100644 --- a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java +++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java @@ -152,9 +152,9 @@ public void testInvalidTimestamp() throws Exception { updateClusterSettings(Settings.builder().put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true)); assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); }, 2, TimeUnit.MINUTES); @@ -227,9 +227,9 @@ public void testGeoIpDatabasesDownload() throws Exception { updateClusterSettings(Settings.builder().put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true)); assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); putGeoIpPipeline(); // This is to work around the race condition described in #92888 }, 2, TimeUnit.MINUTES); @@ -238,9 +238,9 @@ public void testGeoIpDatabasesDownload() throws Exception { assertBusy(() -> { try { GeoIpTaskState state = (GeoIpTaskState) getTask().getState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); GeoIpTaskState.Metadata metadata = state.getDatabases().get(id); int size = metadata.lastChunk() - metadata.firstChunk() + 1; @@ -301,9 +301,9 @@ public void testGeoIpDatabasesDownloadNoGeoipProcessors() throws Exception { assertNotNull(getTask().getState()); // removing all geoip processors should not result in the task being stopped assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); }); } @@ -337,9 +337,9 @@ public void testDoNotDownloadDatabaseOnPipelineCreation() throws Exception { assertAcked(indicesAdmin().prepareUpdateSettings(indexIdentifier).setSettings(indexSettings).get()); assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); }, 2, TimeUnit.MINUTES); diff --git a/modules/ingest-geoip/src/main/java/module-info.java b/modules/ingest-geoip/src/main/java/module-info.java index fa0b0266414f0..4d0acefcb6c9f 100644 --- a/modules/ingest-geoip/src/main/java/module-info.java +++ b/modules/ingest-geoip/src/main/java/module-info.java @@ -15,5 +15,6 @@ requires com.maxmind.geoip2; requires com.maxmind.db; + exports org.elasticsearch.ingest.geoip.direct to org.elasticsearch.server; exports org.elasticsearch.ingest.geoip.stats to org.elasticsearch.server; } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java index efae8fa0c50ca..dcb882ede230c 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java @@ -24,6 +24,7 @@ import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.CheckedRunnable; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Tuple; import org.elasticsearch.env.Environment; import org.elasticsearch.gateway.GatewayService; import org.elasticsearch.index.Index; @@ -52,7 +53,6 @@ import java.util.Collection; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -64,6 +64,7 @@ import java.util.zip.GZIPInputStream; import static org.elasticsearch.core.Strings.format; +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpTaskState.getEnterpriseGeoIpTaskState; import static org.elasticsearch.ingest.geoip.GeoIpTaskState.getGeoIpTaskState; /** @@ -183,13 +184,14 @@ public Boolean isValid(String databaseFile) { if (state == null) { return true; } + GeoIpTaskState.Metadata metadata = state.getDatabases().get(databaseFile); // we never remove metadata from cluster state, if metadata is null we deal with built-in database, which is always valid if (metadata == null) { return true; } - boolean valid = metadata.isValid(currentState.metadata().settings()); + boolean valid = metadata.isNewEnough(currentState.metadata().settings()); if (valid && metadata.isCloseToExpiration()) { HeaderWarning.addWarning( "database [{}] was not updated for over 25 days, geoip processor will stop working if there is no update for 30 days", @@ -269,20 +271,52 @@ void checkDatabases(ClusterState state) { } } - GeoIpTaskState taskState = getGeoIpTaskState(state); - if (taskState == null) { - // Note: an empty state will purge stale entries in databases map - taskState = GeoIpTaskState.EMPTY; + // we'll consult each of the geoip downloaders to build up a list of database metadatas to work with + List> validMetadatas = new ArrayList<>(); + + // process the geoip task state for the (ordinary) geoip downloader + { + GeoIpTaskState taskState = getGeoIpTaskState(state); + if (taskState == null) { + // Note: an empty state will purge stale entries in databases map + taskState = GeoIpTaskState.EMPTY; + } + validMetadatas.addAll( + taskState.getDatabases() + .entrySet() + .stream() + .filter(e -> e.getValue().isNewEnough(state.getMetadata().settings())) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList() + ); + } + + // process the geoip task state for the enterprise geoip downloader + { + EnterpriseGeoIpTaskState taskState = getEnterpriseGeoIpTaskState(state); + if (taskState == null) { + // Note: an empty state will purge stale entries in databases map + taskState = EnterpriseGeoIpTaskState.EMPTY; + } + validMetadatas.addAll( + taskState.getDatabases() + .entrySet() + .stream() + .filter(e -> e.getValue().isNewEnough(state.getMetadata().settings())) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList() + ); } - taskState.getDatabases().entrySet().stream().filter(e -> e.getValue().isValid(state.getMetadata().settings())).forEach(e -> { - String name = e.getKey(); - GeoIpTaskState.Metadata metadata = e.getValue(); + // run through all the valid metadatas, regardless of source, and retrieve them + validMetadatas.forEach(e -> { + String name = e.v1(); + GeoIpTaskState.Metadata metadata = e.v2(); DatabaseReaderLazyLoader reference = databases.get(name); String remoteMd5 = metadata.md5(); String localMd5 = reference != null ? reference.getMd5() : null; if (Objects.equals(localMd5, remoteMd5)) { - logger.debug("Current reference of [{}] is up to date [{}] with was recorded in CS [{}]", name, localMd5, remoteMd5); + logger.debug("[{}] is up to date [{}] with cluster state [{}]", name, localMd5, remoteMd5); return; } @@ -293,15 +327,14 @@ void checkDatabases(ClusterState state) { } }); + // TODO perhaps we need to handle the license flap persistent task state better than we do + // i think the ideal end state is that we *do not* drop the files that the enterprise downloader + // handled if they fall out -- which means we need to track that in the databases map itself + + // start with the list of all databases we currently know about in this service, + // then drop the ones that didn't check out as valid from the task states List staleEntries = new ArrayList<>(databases.keySet()); - staleEntries.removeAll( - taskState.getDatabases() - .entrySet() - .stream() - .filter(e -> e.getValue().isValid(state.getMetadata().settings())) - .map(Map.Entry::getKey) - .collect(Collectors.toSet()) - ); + staleEntries.removeAll(validMetadatas.stream().map(Tuple::v1).collect(Collectors.toSet())); removeStaleEntries(staleEntries); } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java new file mode 100644 index 0000000000000..9645e34751642 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java @@ -0,0 +1,474 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.flush.FlushRequest; +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.hash.MessageDigests; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.index.reindex.DeleteByQueryAction; +import org.elasticsearch.index.reindex.DeleteByQueryRequest; +import org.elasticsearch.ingest.geoip.GeoIpTaskState.Metadata; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata; +import org.elasticsearch.persistent.AllocatedPersistentTask; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask; +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.threadpool.Scheduler; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xcontent.XContentType; + +import java.io.IOException; +import java.io.InputStream; +import java.net.PasswordAuthentication; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderTaskExecutor.MAXMIND_SETTINGS_PREFIX; + +/** + * Main component responsible for downloading new GeoIP databases. + * New databases are downloaded in chunks and stored in .geoip_databases index + * Downloads are verified against MD5 checksum provided by the server + * Current state of all stored databases is stored in cluster state in persistent task state + */ +public class EnterpriseGeoIpDownloader extends AllocatedPersistentTask { + + private static final Logger logger = LogManager.getLogger(EnterpriseGeoIpDownloader.class); + private static final Pattern CHECKSUM_PATTERN = Pattern.compile("(\\w{64})\\s\\s(.*)"); + + // for overriding in tests + static String DEFAULT_MAXMIND_ENDPOINT = System.getProperty( + MAXMIND_SETTINGS_PREFIX + "endpoint.default", + "https://download.maxmind.com/geoip/databases" + ); + // n.b. a future enhancement might be to allow for a MAXMIND_ENDPOINT_SETTING, but + // at the moment this is an unsupported system property for use in tests (only) + + static String downloadUrl(final String name, final String suffix) { + String endpointPattern = DEFAULT_MAXMIND_ENDPOINT; + if (endpointPattern.contains("%")) { + throw new IllegalArgumentException("Invalid endpoint [" + endpointPattern + "]"); + } + if (endpointPattern.endsWith("/") == false) { + endpointPattern += "/"; + } + endpointPattern += "%s/download?suffix=%s"; + + // at this point the pattern looks like this (in the default case): + // https://download.maxmind.com/geoip/databases/%s/download?suffix=%s + + return Strings.format(endpointPattern, name, suffix); + } + + static final String DATABASES_INDEX = ".geoip_databases"; + static final int MAX_CHUNK_SIZE = 1024 * 1024; + + private final Client client; + private final HttpClient httpClient; + private final ClusterService clusterService; + private final ThreadPool threadPool; + + // visible for testing + protected volatile EnterpriseGeoIpTaskState state; + private volatile Scheduler.ScheduledCancellable scheduled; + private final Supplier pollIntervalSupplier; + private final Function credentialsBuilder; + + EnterpriseGeoIpDownloader( + Client client, + HttpClient httpClient, + ClusterService clusterService, + ThreadPool threadPool, + long id, + String type, + String action, + String description, + TaskId parentTask, + Map headers, + Supplier pollIntervalSupplier, + Function credentialsBuilder + ) { + super(id, type, action, description, parentTask, headers); + this.client = client; + this.httpClient = httpClient; + this.clusterService = clusterService; + this.threadPool = threadPool; + this.pollIntervalSupplier = pollIntervalSupplier; + this.credentialsBuilder = credentialsBuilder; + } + + void setState(EnterpriseGeoIpTaskState state) { + // this is for injecting the state in GeoIpDownloaderTaskExecutor#nodeOperation just after the task instance has been created + // by the PersistentTasksNodeService -- since the GeoIpDownloader is newly created, the state will be null, and the passed-in + // state cannot be null + assert this.state == null + : "setState() cannot be called when state is already non-null. This most likely happened because setState() was called twice"; + assert state != null : "Should never call setState with a null state. Pass an EnterpriseGeoIpTaskState.EMPTY instead."; + this.state = state; + } + + // visible for testing + void updateDatabases() throws IOException { + var clusterState = clusterService.state(); + var geoipIndex = clusterState.getMetadata().getIndicesLookup().get(EnterpriseGeoIpDownloader.DATABASES_INDEX); + if (geoipIndex != null) { + logger.trace("the geoip index [{}] exists", EnterpriseGeoIpDownloader.DATABASES_INDEX); + if (clusterState.getRoutingTable().index(geoipIndex.getWriteIndex()).allPrimaryShardsActive() == false) { + logger.debug("not updating databases because not all primary shards of [{}] index are active yet", DATABASES_INDEX); + return; + } + var blockException = clusterState.blocks().indexBlockedException(ClusterBlockLevel.WRITE, geoipIndex.getWriteIndex().getName()); + if (blockException != null) { + throw blockException; + } + } + + logger.trace("Updating geoip databases"); + IngestGeoIpMetadata geoIpMeta = clusterState.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + // if there are entries in the cs that aren't in the persistent task state, + // then download those (only) + // --- + // if there are in the persistent task state, that aren't in the cluster state + // then nuke those (only) + // --- + // else, just download everything + boolean addedSomething = false; + { + Set existingDatabaseNames = state.getDatabases().keySet(); + for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { + final String id = entry.getKey(); + DatabaseConfiguration database = entry.getValue().database(); + if (existingDatabaseNames.contains(database.name() + ".mmdb") == false) { + logger.debug("A new database appeared [{}]", database.name()); + + final String accountId = database.maxmind().accountId(); + try (HttpClient.PasswordAuthenticationHolder holder = credentialsBuilder.apply(accountId)) { + if (holder == null) { + logger.warn("No credentials found to download database [{}], skipping download...", id); + } else { + processDatabase(holder.get(), database); + addedSomething = true; + } + } + } + } + } + + boolean droppedSomething = false; + { + // rip anything out of the task state that doesn't match what's in the cluster state, + // that is, if there's no longer an entry for a database in the repository, + // then drop it from the task state, too + Set databases = geoIpMeta.getDatabases() + .values() + .stream() + .map(c -> c.database().name() + ".mmdb") + .collect(Collectors.toSet()); + EnterpriseGeoIpTaskState _state = state; + Collection> metas = _state.getDatabases() + .entrySet() + .stream() + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList(); + for (Tuple metaTuple : metas) { + String name = metaTuple.v1(); + Metadata meta = metaTuple.v2(); + if (databases.contains(name) == false) { + logger.debug("Dropping [{}], databases was {}", name, databases); + _state = _state.remove(name); + deleteOldChunks(name, meta.lastChunk() + 1); + droppedSomething = true; + } + } + if (droppedSomething) { + state = _state; + updateTaskState(); + } + } + + if (addedSomething == false && droppedSomething == false) { + RuntimeException accumulator = null; + for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { + final String id = entry.getKey(); + DatabaseConfiguration database = entry.getValue().database(); + + final String accountId = database.maxmind().accountId(); + try (HttpClient.PasswordAuthenticationHolder holder = credentialsBuilder.apply(accountId)) { + if (holder == null) { + logger.warn("No credentials found to download database [{}], skipping download...", id); + } else { + processDatabase(holder.get(), database); + } + } catch (Exception e) { + accumulator = ExceptionsHelper.useOrSuppress(accumulator, ExceptionsHelper.convertToRuntime(e)); + } + } + if (accumulator != null) { + throw accumulator; + } + } + } + + /** + * This method fetches the sha256 file and tar.gz file for the given database from the Maxmind endpoint, then indexes that tar.gz + * file into the .geoip_databases Elasticsearch index, deleting any old versions of the database tar.gz from the index if they exist. + * If the computed sha256 does not match the expected sha256, an error will be logged and the database will not be put into the + * Elasticsearch index. + *

      + * As an implementation detail, this method retrieves the sha256 checksum of the database to download and then invokes + * {@link EnterpriseGeoIpDownloader#processDatabase(PasswordAuthentication, String, String, String)} with that checksum, deferring to + * that method to actually download and process the tar.gz itself. + * + * @param auth The credentials to use to download from the Maxmind endpoint + * @param database The database to be downloaded from Maxmind and indexed into an Elasticsearch index + * @throws IOException If there is an error fetching the sha256 file + */ + void processDatabase(PasswordAuthentication auth, DatabaseConfiguration database) throws IOException { + final String name = database.name(); + logger.debug("Processing database [{}] for configuration [{}]", name, database.id()); + + final String sha256Url = downloadUrl(name, "tar.gz.sha256"); + final String tgzUrl = downloadUrl(name, "tar.gz"); + + String result = new String(httpClient.getBytes(auth, sha256Url), StandardCharsets.UTF_8).trim(); // this throws if the auth is bad + var matcher = CHECKSUM_PATTERN.matcher(result); + boolean match = matcher.matches(); + if (match == false) { + throw new RuntimeException("Unexpected sha256 response from [" + sha256Url + "]"); + } + final String sha256 = matcher.group(1); + // the name that comes from the enterprise downloader cluster state doesn't include the .mmdb extension, + // but the downloading and indexing of database code expects it to be there, so we add it on here before further processing + processDatabase(auth, name + ".mmdb", sha256, tgzUrl); + } + + /** + * This method fetches the tar.gz file for the given database from the Maxmind endpoint, then indexes that tar.gz + * file into the .geoip_databases Elasticsearch index, deleting any old versions of the database tar.gz from the index if they exist. + * + * @param auth The credentials to use to download from the Maxmind endpoint + * The name of the database to be downloaded from Maxmind and indexed into an Elasticsearch index + * @param sha256 The sha256 to compare to the computed sha256 of the downloaded tar.gz file + * @param url The URL for the Maxmind endpoint from which the database's tar.gz will be downloaded + */ + private void processDatabase(PasswordAuthentication auth, String name, String sha256, String url) { + Metadata metadata = state.getDatabases().getOrDefault(name, Metadata.EMPTY); + if (Objects.equals(metadata.sha256(), sha256)) { + updateTimestamp(name, metadata); + return; + } + logger.debug("downloading geoip database [{}]", name); + long start = System.currentTimeMillis(); + try (InputStream is = httpClient.get(auth, url)) { + int firstChunk = metadata.lastChunk() + 1; // if there is no metadata, then Metadata.EMPTY + 1 = 0 + Tuple tuple = indexChunks(name, is, firstChunk, MessageDigests.sha256(), sha256, start); + int lastChunk = tuple.v1(); + String md5 = tuple.v2(); + if (lastChunk > firstChunk) { + state = state.put(name, new Metadata(start, firstChunk, lastChunk - 1, md5, start, sha256)); + updateTaskState(); + logger.info("successfully downloaded geoip database [{}]", name); + deleteOldChunks(name, firstChunk); + } + } catch (Exception e) { + logger.error(() -> "error downloading geoip database [" + name + "]", e); + } + } + + // visible for testing + void deleteOldChunks(String name, int firstChunk) { + BoolQueryBuilder queryBuilder = new BoolQueryBuilder().filter(new MatchQueryBuilder("name", name)) + .filter(new RangeQueryBuilder("chunk").to(firstChunk, false)); + DeleteByQueryRequest request = new DeleteByQueryRequest(); + request.indices(DATABASES_INDEX); + request.setQuery(queryBuilder); + client.execute( + DeleteByQueryAction.INSTANCE, + request, + ActionListener.wrap(r -> {}, e -> logger.warn("could not delete old chunks for geoip database [" + name + "]", e)) + ); + } + + // visible for testing + protected void updateTimestamp(String name, Metadata old) { + logger.debug("geoip database [{}] is up to date, updated timestamp", name); + state = state.put( + name, + new Metadata(old.lastUpdate(), old.firstChunk(), old.lastChunk(), old.md5(), System.currentTimeMillis(), old.sha256()) + ); + updateTaskState(); + } + + void updateTaskState() { + PlainActionFuture> future = new PlainActionFuture<>(); + updatePersistentTaskState(state, future); + state = ((EnterpriseGeoIpTaskState) future.actionGet().getState()); + } + + // visible for testing + Tuple indexChunks( + String name, + InputStream is, + int chunk, + @Nullable MessageDigest digest, + String expectedChecksum, + long timestamp + ) throws IOException { + MessageDigest md5 = MessageDigests.md5(); + for (byte[] buf = getChunk(is); buf.length != 0; buf = getChunk(is)) { + md5.update(buf); + if (digest != null) { + digest.update(buf); + } + IndexRequest indexRequest = new IndexRequest(DATABASES_INDEX).id(name + "_" + chunk + "_" + timestamp) + .create(true) + .source(XContentType.SMILE, "name", name, "chunk", chunk, "data", buf); + client.index(indexRequest).actionGet(); + chunk++; + } + + // May take some time before automatic flush kicks in: + // (otherwise the translog will contain large documents for some time without good reason) + FlushRequest flushRequest = new FlushRequest(DATABASES_INDEX); + client.admin().indices().flush(flushRequest).actionGet(); + // Ensure that the chunk documents are visible: + RefreshRequest refreshRequest = new RefreshRequest(DATABASES_INDEX); + client.admin().indices().refresh(refreshRequest).actionGet(); + + String actualMd5 = MessageDigests.toHexString(md5.digest()); + String actualChecksum = digest == null ? actualMd5 : MessageDigests.toHexString(digest.digest()); + if (Objects.equals(expectedChecksum, actualChecksum) == false) { + throw new IOException("checksum mismatch, expected [" + expectedChecksum + "], actual [" + actualChecksum + "]"); + } + return Tuple.tuple(chunk, actualMd5); + } + + // visible for testing + static byte[] getChunk(InputStream is) throws IOException { + byte[] buf = new byte[MAX_CHUNK_SIZE]; + int chunkSize = 0; + while (chunkSize < MAX_CHUNK_SIZE) { + int read = is.read(buf, chunkSize, MAX_CHUNK_SIZE - chunkSize); + if (read == -1) { + break; + } + chunkSize += read; + } + if (chunkSize < MAX_CHUNK_SIZE) { + buf = Arrays.copyOf(buf, chunkSize); + } + return buf; + } + + /** + * Downloads the geoip databases now, and schedules them to be downloaded again after pollInterval. + */ + synchronized void runDownloader() { + // by the time we reach here, the state will never be null + assert this.state != null : "this.setState() is null. You need to call setState() before calling runDownloader()"; + + // there's a race condition between here and requestReschedule. originally this scheduleNextRun call was at the end of this + // block, but remember that updateDatabases can take seconds to run (it's downloading bytes from the internet), and so during the + // very first run there would be no future run scheduled to reschedule in requestReschedule. which meant that if you went from zero + // to N(>=2) databases in quick succession, then all but the first database wouldn't necessarily get downloaded, because the + // requestReschedule call in the EnterpriseGeoIpDownloaderTaskExecutor's clusterChanged wouldn't have a scheduled future run to + // reschedule. scheduling the next run at the beginning of this run means that there's a much smaller window (milliseconds?, rather + // than seconds) in which such a race could occur. technically there's a window here, still, but i think it's _greatly_ reduced. + scheduleNextRun(pollIntervalSupplier.get()); + // TODO regardless of the above comment, i like the idea of checking the lowest last-checked time and then running the math to get + // to the next interval from then -- maybe that's a neat future enhancement to add + + if (isCancelled() || isCompleted()) { + return; + } + try { + updateDatabases(); // n.b. this downloads bytes from the internet, it can take a while + } catch (Exception e) { + logger.error("exception during geoip databases update", e); + } + try { + cleanDatabases(); + } catch (Exception e) { + logger.error("exception during geoip databases cleanup", e); + } + } + + /** + * This method requests that the downloader be rescheduled to run immediately (presumably because a dynamic property supplied by + * pollIntervalSupplier or eagerDownloadSupplier has changed, or a pipeline with a geoip processor has been added). This method does + * nothing if this task is cancelled, completed, or has not yet been scheduled to run for the first time. It cancels any existing + * scheduled run. + */ + public void requestReschedule() { + if (isCancelled() || isCompleted()) { + return; + } + if (scheduled != null && scheduled.cancel()) { + scheduleNextRun(TimeValue.ZERO); + } + } + + private void cleanDatabases() { + List> expiredDatabases = state.getDatabases() + .entrySet() + .stream() + .filter(e -> e.getValue().isNewEnough(clusterService.state().metadata().settings()) == false) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList(); + expiredDatabases.forEach(e -> { + String name = e.v1(); + Metadata meta = e.v2(); + deleteOldChunks(name, meta.lastChunk() + 1); + state = state.put(name, new Metadata(meta.lastUpdate(), meta.firstChunk(), meta.lastChunk(), meta.md5(), meta.lastCheck() - 1)); + updateTaskState(); + }); + } + + @Override + protected void onCancelled() { + if (scheduled != null) { + scheduled.cancel(); + } + markAsCompleted(); + } + + private void scheduleNextRun(TimeValue time) { + if (threadPool.scheduler().isShutdown() == false) { + scheduled = threadPool.schedule(this::runDownloader, time, threadPool.generic()); + } + } + +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java new file mode 100644 index 0000000000000..8fc46fe157548 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java @@ -0,0 +1,257 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.client.internal.OriginSettingClient; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterStateListener; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.settings.SecureSetting; +import org.elasticsearch.common.settings.SecureSettings; +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.ingest.EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams; +import org.elasticsearch.ingest.IngestService; +import org.elasticsearch.persistent.AllocatedPersistentTask; +import org.elasticsearch.persistent.PersistentTaskState; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.persistent.PersistentTasksExecutor; +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.io.InputStream; +import java.security.GeneralSecurityException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; +import static org.elasticsearch.ingest.geoip.GeoIpDownloaderTaskExecutor.ENABLED_SETTING; +import static org.elasticsearch.ingest.geoip.GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING; + +public class EnterpriseGeoIpDownloaderTaskExecutor extends PersistentTasksExecutor + implements + ClusterStateListener { + private static final Logger logger = LogManager.getLogger(EnterpriseGeoIpDownloader.class); + + static final String MAXMIND_SETTINGS_PREFIX = "ingest.geoip.downloader.maxmind."; + + public static final Setting MAXMIND_LICENSE_KEY_SETTING = SecureSetting.secureString( + MAXMIND_SETTINGS_PREFIX + "license_key", + null + ); + + private final Client client; + private final HttpClient httpClient; + private final ClusterService clusterService; + private final ThreadPool threadPool; + private final Settings settings; + private volatile TimeValue pollInterval; + private final AtomicReference currentTask = new AtomicReference<>(); + + private volatile SecureSettings cachedSecureSettings; + + EnterpriseGeoIpDownloaderTaskExecutor(Client client, HttpClient httpClient, ClusterService clusterService, ThreadPool threadPool) { + super(ENTERPRISE_GEOIP_DOWNLOADER, threadPool.generic()); + this.client = new OriginSettingClient(client, IngestService.INGEST_ORIGIN); + this.httpClient = httpClient; + this.clusterService = clusterService; + this.threadPool = threadPool; + this.settings = clusterService.getSettings(); + this.pollInterval = POLL_INTERVAL_SETTING.get(settings); + + // do an initial load using the node settings + reload(clusterService.getSettings()); + } + + /** + * This method completes the initialization of the EnterpriseGeoIpDownloaderTaskExecutor by registering several listeners. + */ + public void init() { + clusterService.addListener(this); + clusterService.getClusterSettings().addSettingsUpdateConsumer(POLL_INTERVAL_SETTING, this::setPollInterval); + } + + private void setPollInterval(TimeValue pollInterval) { + if (Objects.equals(this.pollInterval, pollInterval) == false) { + this.pollInterval = pollInterval; + EnterpriseGeoIpDownloader currentDownloader = getCurrentTask(); + if (currentDownloader != null) { + currentDownloader.requestReschedule(); + } + } + } + + private HttpClient.PasswordAuthenticationHolder buildCredentials(final String username) { + final char[] passwordChars; + if (cachedSecureSettings.getSettingNames().contains(MAXMIND_LICENSE_KEY_SETTING.getKey())) { + passwordChars = cachedSecureSettings.getString(MAXMIND_LICENSE_KEY_SETTING.getKey()).getChars(); + } else { + passwordChars = null; + } + + // if the username is missing, empty, or blank, return null as 'no auth' + if (username == null || username.isEmpty() || username.isBlank()) { + return null; + } + + // likewise if the password chars array is missing or empty, return null as 'no auth' + if (passwordChars == null || passwordChars.length == 0) { + return null; + } + + return new HttpClient.PasswordAuthenticationHolder(username, passwordChars); + } + + @Override + protected EnterpriseGeoIpDownloader createTask( + long id, + String type, + String action, + TaskId parentTaskId, + PersistentTasksCustomMetadata.PersistentTask taskInProgress, + Map headers + ) { + return new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + id, + type, + action, + getDescription(taskInProgress), + parentTaskId, + headers, + () -> pollInterval, + this::buildCredentials + ); + } + + @Override + protected void nodeOperation(AllocatedPersistentTask task, EnterpriseGeoIpTaskParams params, PersistentTaskState state) { + EnterpriseGeoIpDownloader downloader = (EnterpriseGeoIpDownloader) task; + EnterpriseGeoIpTaskState geoIpTaskState = (state == null) ? EnterpriseGeoIpTaskState.EMPTY : (EnterpriseGeoIpTaskState) state; + downloader.setState(geoIpTaskState); + currentTask.set(downloader); + if (ENABLED_SETTING.get(clusterService.state().metadata().settings(), settings)) { + downloader.runDownloader(); + } + } + + public EnterpriseGeoIpDownloader getCurrentTask() { + return currentTask.get(); + } + + @Override + public void clusterChanged(ClusterChangedEvent event) { + EnterpriseGeoIpDownloader currentDownloader = getCurrentTask(); + if (currentDownloader != null) { + boolean hasGeoIpMetadataChanges = event.metadataChanged() + && event.changedCustomMetadataSet().contains(IngestGeoIpMetadata.TYPE); + if (hasGeoIpMetadataChanges) { + currentDownloader.requestReschedule(); // watching the cluster changed events to kick the thing off if it's not running + } + } + } + + public synchronized void reload(Settings settings) { + // `SecureSettings` are available here! cache them as they will be needed + // whenever dynamic cluster settings change and we have to rebuild the accounts + try { + this.cachedSecureSettings = extractSecureSettings(settings, List.of(MAXMIND_LICENSE_KEY_SETTING)); + } catch (GeneralSecurityException e) { + // rethrow as a runtime exception, there's logging higher up the call chain around ReloadablePlugin + throw new ElasticsearchException("Exception while reloading enterprise geoip download task executor", e); + } + } + + /** + * Extracts the {@link SecureSettings}` out of the passed in {@link Settings} object. The {@code Setting} argument has to have the + * {@code SecureSettings} open/available. Normally {@code SecureSettings} are available only under specific callstacks (eg. during node + * initialization or during a `reload` call). The returned copy can be reused freely as it will never be closed (this is a bit of + * cheating, but it is necessary in this specific circumstance). Only works for secure settings of type string (not file). + * + * @param source A {@code Settings} object with its {@code SecureSettings} open/available. + * @param securePluginSettings The list of settings to copy. + * @return A copy of the {@code SecureSettings} of the passed in {@code Settings} argument. + */ + private static SecureSettings extractSecureSettings(Settings source, List> securePluginSettings) + throws GeneralSecurityException { + // get the secure settings out + final SecureSettings sourceSecureSettings = Settings.builder().put(source, true).getSecureSettings(); + // filter and cache them... + final Map innerMap = new HashMap<>(); + if (sourceSecureSettings != null && securePluginSettings != null) { + for (final String settingKey : sourceSecureSettings.getSettingNames()) { + for (final Setting secureSetting : securePluginSettings) { + if (secureSetting.match(settingKey)) { + innerMap.put( + settingKey, + new SecureSettingValue( + sourceSecureSettings.getString(settingKey), + sourceSecureSettings.getSHA256Digest(settingKey) + ) + ); + } + } + } + } + return new SecureSettings() { + @Override + public boolean isLoaded() { + return true; + } + + @Override + public SecureString getString(String setting) { + return innerMap.get(setting).value(); + } + + @Override + public Set getSettingNames() { + return innerMap.keySet(); + } + + @Override + public InputStream getFile(String setting) { + throw new UnsupportedOperationException("A cached SecureSetting cannot be a file"); + } + + @Override + public byte[] getSHA256Digest(String setting) { + return innerMap.get(setting).sha256Digest(); + } + + @Override + public void close() throws IOException {} + + @Override + public void writeTo(StreamOutput out) throws IOException { + throw new UnsupportedOperationException("A cached SecureSetting cannot be serialized"); + } + }; + } + + /** + * A single-purpose record for the internal implementation of extractSecureSettings + */ + private record SecureSettingValue(SecureString value, byte[] sha256Digest) {} +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskState.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskState.java new file mode 100644 index 0000000000000..57e944ef9b994 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskState.java @@ -0,0 +1,153 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.VersionedNamedWriteable; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.EnterpriseGeoIpTask; +import org.elasticsearch.ingest.geoip.GeoIpTaskState.Metadata; +import org.elasticsearch.persistent.PersistentTaskState; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import static org.elasticsearch.ingest.geoip.GeoIpDownloader.GEOIP_DOWNLOADER; +import static org.elasticsearch.persistent.PersistentTasksCustomMetadata.getTaskWithId; +import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; + +class EnterpriseGeoIpTaskState implements PersistentTaskState, VersionedNamedWriteable { + + private static final ParseField DATABASES = new ParseField("databases"); + + static final EnterpriseGeoIpTaskState EMPTY = new EnterpriseGeoIpTaskState(Map.of()); + + @SuppressWarnings("unchecked") + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + GEOIP_DOWNLOADER, + true, + args -> { + List> databases = (List>) args[0]; + return new EnterpriseGeoIpTaskState(databases.stream().collect(Collectors.toMap(Tuple::v1, Tuple::v2))); + } + ); + + static { + PARSER.declareNamedObjects(constructorArg(), (p, c, name) -> Tuple.tuple(name, Metadata.fromXContent(p)), DATABASES); + } + + public static EnterpriseGeoIpTaskState fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + private final Map databases; + + EnterpriseGeoIpTaskState(Map databases) { + this.databases = Map.copyOf(databases); + } + + EnterpriseGeoIpTaskState(StreamInput input) throws IOException { + databases = input.readImmutableMap( + in -> new Metadata(in.readLong(), in.readVInt(), in.readVInt(), in.readString(), in.readLong(), in.readOptionalString()) + ); + } + + public EnterpriseGeoIpTaskState put(String name, Metadata metadata) { + HashMap newDatabases = new HashMap<>(databases); + newDatabases.put(name, metadata); + return new EnterpriseGeoIpTaskState(newDatabases); + } + + public EnterpriseGeoIpTaskState remove(String name) { + HashMap newDatabases = new HashMap<>(databases); + newDatabases.remove(name); + return new EnterpriseGeoIpTaskState(newDatabases); + } + + public Map getDatabases() { + return databases; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + EnterpriseGeoIpTaskState that = (EnterpriseGeoIpTaskState) o; + return databases.equals(that.databases); + } + + @Override + public int hashCode() { + return Objects.hash(databases); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + { + builder.startObject("databases"); + for (Map.Entry e : databases.entrySet()) { + builder.field(e.getKey(), e.getValue()); + } + builder.endObject(); + } + builder.endObject(); + return builder; + } + + @Override + public String getWriteableName() { + return "enterprise-geoip-downloader"; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeMap(databases, (o, v) -> { + o.writeLong(v.lastUpdate()); + o.writeVInt(v.firstChunk()); + o.writeVInt(v.lastChunk()); + o.writeString(v.md5()); + o.writeLong(v.lastCheck()); + o.writeOptionalString(v.sha256()); + }); + } + + /** + * Retrieves the geoip downloader's task state from the cluster state. This may return null in some circumstances, + * for example if the geoip downloader task hasn't been created yet (which it wouldn't be if it's disabled). + * + * @param state the cluster state to read the task state from + * @return the geoip downloader's task state or null if there is not a state to read + */ + @Nullable + static EnterpriseGeoIpTaskState getEnterpriseGeoIpTaskState(ClusterState state) { + PersistentTasksCustomMetadata.PersistentTask task = getTaskWithId(state, EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER); + return (task == null) ? null : (EnterpriseGeoIpTaskState) task.getState(); + } + +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java index 13394a2a0c7cc..ee6f2f16f051b 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.Tuple; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.RangeQueryBuilder; @@ -318,14 +319,15 @@ public void requestReschedule() { } private void cleanDatabases() { - List> expiredDatabases = state.getDatabases() + List> expiredDatabases = state.getDatabases() .entrySet() .stream() - .filter(e -> e.getValue().isValid(clusterService.state().metadata().settings()) == false) + .filter(e -> e.getValue().isNewEnough(clusterService.state().metadata().settings()) == false) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) .toList(); expiredDatabases.forEach(e -> { - String name = e.getKey(); - Metadata meta = e.getValue(); + String name = e.v1(); + Metadata meta = e.v2(); deleteOldChunks(name, meta.lastChunk() + 1); state = state.put(name, new Metadata(meta.lastUpdate(), meta.firstChunk(), meta.lastChunk(), meta.md5(), meta.lastCheck() - 1)); updateTaskState(); diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java index 09ac488f96e2d..3f89bb1dd5c50 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java @@ -217,7 +217,7 @@ public void clusterChanged(ClusterChangedEvent event) { } boolean hasIndicesChanges = event.previousState().metadata().indices().equals(event.state().metadata().indices()) == false; - boolean hasIngestPipelineChanges = event.changedCustomMetadataSet().contains(IngestMetadata.TYPE); + boolean hasIngestPipelineChanges = event.metadataChanged() && event.changedCustomMetadataSet().contains(IngestMetadata.TYPE); if (hasIngestPipelineChanges || hasIndicesChanges) { boolean newAtLeastOneGeoipProcessor = hasAtLeastOneGeoipProcessor(event.state()); diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java index a405d90b24dcc..45607297e872d 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java @@ -42,6 +42,11 @@ class GeoIpTaskState implements PersistentTaskState, VersionedNamedWriteable { + // for clarity inside this file, it's useful to have an alias that reads like what we're using it for + // rather than what the version is -- previously this was two separate conceptual versions, but it's not + // especially useful to make that distinction in the TransportVersions class itself + private static final TransportVersion INCLUDE_SHA256 = TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER; + private static final ParseField DATABASES = new ParseField("databases"); static final GeoIpTaskState EMPTY = new GeoIpTaskState(Map.of()); @@ -71,7 +76,16 @@ public static GeoIpTaskState fromXContent(XContentParser parser) throws IOExcept } GeoIpTaskState(StreamInput input) throws IOException { - databases = input.readImmutableMap(in -> new Metadata(in.readLong(), in.readVInt(), in.readVInt(), in.readString(), in.readLong())); + databases = input.readImmutableMap( + in -> new Metadata( + in.readLong(), + in.readVInt(), + in.readVInt(), + in.readString(), + in.readLong(), + in.getTransportVersion().onOrAfter(INCLUDE_SHA256) ? input.readOptionalString() : null + ) + ); } public GeoIpTaskState put(String name, Metadata metadata) { @@ -129,16 +143,21 @@ public void writeTo(StreamOutput out) throws IOException { o.writeVInt(v.lastChunk); o.writeString(v.md5); o.writeLong(v.lastCheck); + if (o.getTransportVersion().onOrAfter(INCLUDE_SHA256)) { + o.writeOptionalString(v.sha256); + } }); } - record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long lastCheck) implements ToXContentObject { + record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long lastCheck, @Nullable String sha256) + implements + ToXContentObject { /** * An empty Metadata object useful for getOrDefault -type calls. Crucially, the 'lastChunk' is -1, so it's safe to use * with logic that says the new firstChunk is the old lastChunk + 1. */ - static Metadata EMPTY = new Metadata(-1, -1, -1, "", -1); + static Metadata EMPTY = new Metadata(-1, -1, -1, "", -1, null); private static final String NAME = GEOIP_DOWNLOADER + "-metadata"; private static final ParseField LAST_CHECK = new ParseField("last_check"); @@ -146,6 +165,7 @@ record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long private static final ParseField FIRST_CHUNK = new ParseField("first_chunk"); private static final ParseField LAST_CHUNK = new ParseField("last_chunk"); private static final ParseField MD5 = new ParseField("md5"); + private static final ParseField SHA256 = new ParseField("sha256"); private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( NAME, @@ -155,7 +175,8 @@ record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long (int) args[1], (int) args[2], (String) args[3], - (long) (args[4] == null ? args[0] : args[4]) + (long) (args[4] == null ? args[0] : args[4]), + (String) args[5] ) ); @@ -165,6 +186,7 @@ record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long PARSER.declareInt(constructorArg(), LAST_CHUNK); PARSER.declareString(constructorArg(), MD5); PARSER.declareLong(optionalConstructorArg(), LAST_CHECK); + PARSER.declareString(optionalConstructorArg(), SHA256); } public static Metadata fromXContent(XContentParser parser) { @@ -179,11 +201,15 @@ public static Metadata fromXContent(XContentParser parser) { Objects.requireNonNull(md5); } + Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long lastCheck) { + this(lastUpdate, firstChunk, lastChunk, md5, lastCheck, null); + } + public boolean isCloseToExpiration() { return Instant.ofEpochMilli(lastCheck).isBefore(Instant.now().minus(25, ChronoUnit.DAYS)); } - public boolean isValid(Settings settings) { + public boolean isNewEnough(Settings settings) { TimeValue valid = settings.getAsTime("ingest.geoip.database_validity", TimeValue.timeValueDays(30)); return Instant.ofEpochMilli(lastCheck).isAfter(Instant.now().minus(valid.getMillis(), ChronoUnit.MILLIS)); } @@ -197,6 +223,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(FIRST_CHUNK.getPreferredName(), firstChunk); builder.field(LAST_CHUNK.getPreferredName(), lastChunk); builder.field(MD5.getPreferredName(), md5); + if (sha256 != null) { // only serialize if not null, for prettiness reasons + builder.field(SHA256.getPreferredName(), sha256); + } } builder.endObject(); return builder; diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java index 8efc4dc2e74bd..2f6bd6ef20fd0 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java @@ -24,6 +24,7 @@ import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; +import java.util.Arrays; import java.util.Objects; import static java.net.HttpURLConnection.HTTP_MOVED_PERM; @@ -34,6 +35,31 @@ class HttpClient { + /** + * A PasswordAuthenticationHolder is just a wrapper around a PasswordAuthentication to implement AutoCloseable. + * This construction makes it possible to use a PasswordAuthentication in a try-with-resources statement, which + * makes it easier to ensure cleanup of the PasswordAuthentication is performed after it's finished being used. + */ + static final class PasswordAuthenticationHolder implements AutoCloseable { + private PasswordAuthentication auth; + + PasswordAuthenticationHolder(String username, char[] passwordChars) { + this.auth = new PasswordAuthentication(username, passwordChars); // clones the passed-in chars + } + + public PasswordAuthentication get() { + Objects.requireNonNull(auth); + return auth; + } + + @Override + public void close() { + final PasswordAuthentication clear = this.auth; + this.auth = null; // set to null and then clear it + Arrays.fill(clear.getPassword(), '\0'); // zero out the password chars + } + } + // a private sentinel value for representing the idea that there's no auth for some request. // this allows us to have a not-null requirement on the methods that do accept an auth. // if you don't want auth, then don't use those methods. ;) diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadata.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadata.java new file mode 100644 index 0000000000000..f5ac755b6b980 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadata.java @@ -0,0 +1,157 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.cluster.Diff; +import org.elasticsearch.cluster.DiffableUtils; +import org.elasticsearch.cluster.NamedDiff; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Holds the ingest-geoip databases that are available in the cluster state. + */ +public final class IngestGeoIpMetadata implements Metadata.Custom { + + public static final String TYPE = "ingest_geoip"; + private static final ParseField DATABASES_FIELD = new ParseField("databases"); + + public static final IngestGeoIpMetadata EMPTY = new IngestGeoIpMetadata(Map.of()); + + @SuppressWarnings("unchecked") + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "ingest_geoip_metadata", + a -> new IngestGeoIpMetadata( + ((List) a[0]).stream().collect(Collectors.toMap((m) -> m.database().id(), Function.identity())) + ) + ); + static { + PARSER.declareNamedObjects(ConstructingObjectParser.constructorArg(), (p, c, n) -> DatabaseConfigurationMetadata.parse(p, n), v -> { + throw new IllegalArgumentException("ordered " + DATABASES_FIELD.getPreferredName() + " are not supported"); + }, DATABASES_FIELD); + } + + private final Map databases; + + public IngestGeoIpMetadata(Map databases) { + this.databases = Map.copyOf(databases); + } + + @Override + public String getWriteableName() { + return TYPE; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER; + } + + public Map getDatabases() { + return databases; + } + + public IngestGeoIpMetadata(StreamInput in) throws IOException { + this.databases = in.readMap(StreamInput::readString, DatabaseConfigurationMetadata::new); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeMap(databases, StreamOutput::writeWriteable); + } + + public static IngestGeoIpMetadata fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + @Override + public Iterator toXContentChunked(ToXContent.Params ignored) { + return Iterators.concat(ChunkedToXContentHelper.xContentValuesMap(DATABASES_FIELD.getPreferredName(), databases)); + } + + @Override + public EnumSet context() { + return Metadata.ALL_CONTEXTS; + } + + @Override + public Diff diff(Metadata.Custom before) { + return new GeoIpMetadataDiff((IngestGeoIpMetadata) before, this); + } + + static class GeoIpMetadataDiff implements NamedDiff { + + final Diff> databases; + + GeoIpMetadataDiff(IngestGeoIpMetadata before, IngestGeoIpMetadata after) { + this.databases = DiffableUtils.diff(before.databases, after.databases, DiffableUtils.getStringKeySerializer()); + } + + GeoIpMetadataDiff(StreamInput in) throws IOException { + databases = DiffableUtils.readJdkMapDiff( + in, + DiffableUtils.getStringKeySerializer(), + DatabaseConfigurationMetadata::new, + DatabaseConfigurationMetadata::readDiffFrom + ); + } + + @Override + public Metadata.Custom apply(Metadata.Custom part) { + return new IngestGeoIpMetadata(databases.apply(((IngestGeoIpMetadata) part).databases)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + databases.writeTo(out); + } + + @Override + public String getWriteableName() { + return TYPE; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + IngestGeoIpMetadata that = (IngestGeoIpMetadata) o; + return Objects.equals(databases, that.databases); + } + + @Override + public int hashCode() { + return Objects.hash(databases); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java index 9d0f9848d97b6..e606688ad60a0 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java @@ -12,8 +12,10 @@ import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.NamedDiff; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; @@ -25,8 +27,18 @@ import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.indices.SystemIndexDescriptor; +import org.elasticsearch.ingest.EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams; import org.elasticsearch.ingest.IngestService; import org.elasticsearch.ingest.Processor; +import org.elasticsearch.ingest.geoip.direct.DeleteDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.GetDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.RestDeleteDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.RestGetDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.RestPutDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.TransportDeleteDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.TransportGetDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.TransportPutDatabaseConfigurationAction; import org.elasticsearch.ingest.geoip.stats.GeoIpDownloaderStats; import org.elasticsearch.ingest.geoip.stats.GeoIpStatsAction; import org.elasticsearch.ingest.geoip.stats.GeoIpStatsTransportAction; @@ -38,6 +50,7 @@ import org.elasticsearch.plugins.IngestPlugin; import org.elasticsearch.plugins.PersistentTaskPlugin; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.ReloadablePlugin; import org.elasticsearch.plugins.SystemIndexPlugin; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; @@ -57,13 +70,21 @@ import java.util.function.Supplier; import static org.elasticsearch.index.mapper.MapperService.SINGLE_MAPPING_NAME; +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; import static org.elasticsearch.ingest.IngestService.INGEST_ORIGIN; import static org.elasticsearch.ingest.geoip.GeoIpDownloader.DATABASES_INDEX; import static org.elasticsearch.ingest.geoip.GeoIpDownloader.DATABASES_INDEX_PATTERN; import static org.elasticsearch.ingest.geoip.GeoIpDownloader.GEOIP_DOWNLOADER; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; -public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, SystemIndexPlugin, Closeable, PersistentTaskPlugin, ActionPlugin { +public class IngestGeoIpPlugin extends Plugin + implements + IngestPlugin, + SystemIndexPlugin, + Closeable, + PersistentTaskPlugin, + ActionPlugin, + ReloadablePlugin { public static final Setting CACHE_SIZE = Setting.longSetting("ingest.geoip.cache_size", 1000, 0, Setting.Property.NodeScope); private static final int GEOIP_INDEX_MAPPINGS_VERSION = 1; /** @@ -78,6 +99,7 @@ public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, SystemInd private final SetOnce ingestService = new SetOnce<>(); private final SetOnce databaseRegistry = new SetOnce<>(); private GeoIpDownloaderTaskExecutor geoIpDownloaderTaskExecutor; + private EnterpriseGeoIpDownloaderTaskExecutor enterpriseGeoIpDownloaderTaskExecutor; @Override public List> getSettings() { @@ -86,7 +108,8 @@ public List> getSettings() { GeoIpDownloaderTaskExecutor.EAGER_DOWNLOAD_SETTING, GeoIpDownloaderTaskExecutor.ENABLED_SETTING, GeoIpDownloader.ENDPOINT_SETTING, - GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING + GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING, + EnterpriseGeoIpDownloaderTaskExecutor.MAXMIND_LICENSE_KEY_SETTING ); } @@ -123,7 +146,16 @@ public Collection createComponents(PluginServices services) { services.threadPool() ); geoIpDownloaderTaskExecutor.init(); - return List.of(databaseRegistry.get(), geoIpDownloaderTaskExecutor); + + enterpriseGeoIpDownloaderTaskExecutor = new EnterpriseGeoIpDownloaderTaskExecutor( + services.client(), + new HttpClient(), + services.clusterService(), + services.threadPool() + ); + enterpriseGeoIpDownloaderTaskExecutor.init(); + + return List.of(databaseRegistry.get(), geoIpDownloaderTaskExecutor, enterpriseGeoIpDownloaderTaskExecutor); } @Override @@ -139,12 +171,17 @@ public List> getPersistentTasksExecutor( SettingsModule settingsModule, IndexNameExpressionResolver expressionResolver ) { - return List.of(geoIpDownloaderTaskExecutor); + return List.of(geoIpDownloaderTaskExecutor, enterpriseGeoIpDownloaderTaskExecutor); } @Override public List> getActions() { - return List.of(new ActionHandler<>(GeoIpStatsAction.INSTANCE, GeoIpStatsTransportAction.class)); + return List.of( + new ActionHandler<>(GeoIpStatsAction.INSTANCE, GeoIpStatsTransportAction.class), + new ActionHandler<>(GetDatabaseConfigurationAction.INSTANCE, TransportGetDatabaseConfigurationAction.class), + new ActionHandler<>(DeleteDatabaseConfigurationAction.INSTANCE, TransportDeleteDatabaseConfigurationAction.class), + new ActionHandler<>(PutDatabaseConfigurationAction.INSTANCE, TransportPutDatabaseConfigurationAction.class) + ); } @Override @@ -159,22 +196,41 @@ public List getRestHandlers( Supplier nodesInCluster, Predicate clusterSupportsFeature ) { - return List.of(new RestGeoIpStatsAction()); + return List.of( + new RestGeoIpStatsAction(), + new RestGetDatabaseConfigurationAction(), + new RestDeleteDatabaseConfigurationAction(), + new RestPutDatabaseConfigurationAction() + ); } @Override public List getNamedXContent() { return List.of( new NamedXContentRegistry.Entry(PersistentTaskParams.class, new ParseField(GEOIP_DOWNLOADER), GeoIpTaskParams::fromXContent), - new NamedXContentRegistry.Entry(PersistentTaskState.class, new ParseField(GEOIP_DOWNLOADER), GeoIpTaskState::fromXContent) + new NamedXContentRegistry.Entry(PersistentTaskState.class, new ParseField(GEOIP_DOWNLOADER), GeoIpTaskState::fromXContent), + new NamedXContentRegistry.Entry( + PersistentTaskParams.class, + new ParseField(ENTERPRISE_GEOIP_DOWNLOADER), + EnterpriseGeoIpTaskParams::fromXContent + ), + new NamedXContentRegistry.Entry( + PersistentTaskState.class, + new ParseField(ENTERPRISE_GEOIP_DOWNLOADER), + EnterpriseGeoIpTaskState::fromXContent + ) ); } @Override public List getNamedWriteables() { return List.of( + new NamedWriteableRegistry.Entry(Metadata.Custom.class, IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata::new), + new NamedWriteableRegistry.Entry(NamedDiff.class, IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.GeoIpMetadataDiff::new), new NamedWriteableRegistry.Entry(PersistentTaskState.class, GEOIP_DOWNLOADER, GeoIpTaskState::new), new NamedWriteableRegistry.Entry(PersistentTaskParams.class, GEOIP_DOWNLOADER, GeoIpTaskParams::new), + new NamedWriteableRegistry.Entry(PersistentTaskState.class, ENTERPRISE_GEOIP_DOWNLOADER, EnterpriseGeoIpTaskState::new), + new NamedWriteableRegistry.Entry(PersistentTaskParams.class, ENTERPRISE_GEOIP_DOWNLOADER, EnterpriseGeoIpTaskParams::new), new NamedWriteableRegistry.Entry(Task.Status.class, GEOIP_DOWNLOADER, GeoIpDownloaderStats::new) ); } @@ -235,4 +291,9 @@ private static XContentBuilder mappings() { throw new UncheckedIOException("Failed to build mappings for " + DATABASES_INDEX, e); } } + + @Override + public void reload(Settings settings) { + enterpriseGeoIpDownloaderTaskExecutor.reload(settings); + } } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java new file mode 100644 index 0000000000000..0a43d7a2d830b --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java @@ -0,0 +1,209 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.cluster.metadata.MetadataCreateIndexService; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Objects; +import java.util.Set; +import java.util.regex.Pattern; + +/** + * A database configuration is an identified (has an id) configuration of a named geoip location database to download, + * and the identifying information/configuration to download the named database from some database provider. + *

      + * That is, it has an id e.g. "my_db_config_1" and it says "download the file named XXXX from SomeCompany, and here's the + * magic token to use to do that." + */ +public record DatabaseConfiguration(String id, String name, Maxmind maxmind) implements Writeable, ToXContentObject { + + // id is a user selected signifier like 'my_domain_db' + // name is the name of a file that can be downloaded (like 'GeoIP2-Domain') + + // a configuration will have a 'type' like "maxmind", and that might have some more details, + // for now, though the important thing is that the json has to have it even though we don't model it meaningfully in this class + + public DatabaseConfiguration { + // these are invariants, not actual validation + Objects.requireNonNull(id); + Objects.requireNonNull(name); + Objects.requireNonNull(maxmind); + } + + /** + * An alphanumeric, followed by 0-126 alphanumerics, dashes, or underscores. That is, 1-127 alphanumerics, dashes, or underscores, + * but a leading dash or underscore isn't allowed (we're reserving leading dashes and underscores [and other odd characters] for + * Elastic and the future). + */ + private static final Pattern ID_PATTERN = Pattern.compile("\\p{Alnum}[_\\-\\p{Alnum}]{0,126}"); + + public static final Set MAXMIND_NAMES = Set.of( + "GeoIP2-Anonymous-IP", + "GeoIP2-City", + "GeoIP2-Connection-Type", + "GeoIP2-Country", + "GeoIP2-Domain", + "GeoIP2-Enterprise", + "GeoIP2-ISP" + + // in order to prevent a conflict between the (ordinary) geoip downloader and the enterprise geoip downloader, + // the enterprise geoip downloader is limited only to downloading the commercial files that the (ordinary) geoip downloader + // doesn't support out of the box -- in the future if we would like to relax this constraint, then we'll need to resolve that + // conflict at the same time. + + // "GeoLite2-ASN", + // "GeoLite2-City", + // "GeoLite2-Country" + ); + + private static final ParseField NAME = new ParseField("name"); + private static final ParseField MAXMIND = new ParseField("maxmind"); + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "database", + false, + (a, id) -> { + String name = (String) a[0]; + Maxmind maxmind = (Maxmind) a[1]; + return new DatabaseConfiguration(id, name, maxmind); + } + ); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME); + PARSER.declareObject(ConstructingObjectParser.constructorArg(), (parser, id) -> Maxmind.PARSER.apply(parser, null), MAXMIND); + } + + public DatabaseConfiguration(StreamInput in) throws IOException { + this(in.readString(), in.readString(), new Maxmind(in)); + } + + public static DatabaseConfiguration parse(XContentParser parser, String id) { + return PARSER.apply(parser, id); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(id); + out.writeString(name); + maxmind.writeTo(out); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("name", name); + builder.field("maxmind", maxmind); + builder.endObject(); + return builder; + } + + /** + * An id is intended to be alphanumerics, dashes, and underscores (only), but we're reserving leading dashes and underscores for + * ourselves in the future, that is, they're not for the ones that users can PUT. + */ + static void validateId(String id) throws IllegalArgumentException { + if (Strings.isNullOrEmpty(id)) { + throw new IllegalArgumentException("invalid database configuration id [" + id + "]: must not be null or empty"); + } + MetadataCreateIndexService.validateIndexOrAliasName( + id, + (id1, description) -> new IllegalArgumentException("invalid database configuration id [" + id1 + "]: " + description) + ); + int byteCount = id.getBytes(StandardCharsets.UTF_8).length; + if (byteCount > 127) { + throw new IllegalArgumentException( + "invalid database configuration id [" + id + "]: id is too long, (" + byteCount + " > " + 127 + ")" + ); + } + if (ID_PATTERN.matcher(id).matches() == false) { + throw new IllegalArgumentException( + "invalid database configuration id [" + + id + + "]: id doesn't match required rules (alphanumerics, dashes, and underscores, only)" + ); + } + } + + public ActionRequestValidationException validate() { + ActionRequestValidationException err = new ActionRequestValidationException(); + + // how do we cross the id validation divide here? or do we? it seems unfortunate to not invoke it at all. + + // name validation + if (Strings.hasText(name) == false) { + err.addValidationError("invalid name [" + name + "]: cannot be empty"); + } + + if (MAXMIND_NAMES.contains(name) == false) { + err.addValidationError("invalid name [" + name + "]: must be a supported name ([" + MAXMIND_NAMES + "])"); + } + + // important: the name must be unique across all configurations of this same type, + // but we validate that in the cluster state update, not here. + try { + validateId(id); + } catch (IllegalArgumentException e) { + err.addValidationError(e.getMessage()); + } + return err.validationErrors().isEmpty() ? null : err; + } + + public record Maxmind(String accountId) implements Writeable, ToXContentObject { + + public Maxmind { + // this is an invariant, not actual validation + Objects.requireNonNull(accountId); + } + + private static final ParseField ACCOUNT_ID = new ParseField("account_id"); + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("database", false, (a, id) -> { + String accountId = (String) a[0]; + return new Maxmind(accountId); + }); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), ACCOUNT_ID); + } + + public Maxmind(StreamInput in) throws IOException { + this(in.readString()); + } + + public static Maxmind parse(XContentParser parser) { + return PARSER.apply(parser, null); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(accountId); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("account_id", accountId); + builder.endObject(); + return builder; + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadata.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadata.java new file mode 100644 index 0000000000000..574f97e4c5e64 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadata.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.cluster.Diff; +import org.elasticsearch.cluster.SimpleDiffable; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +/** + * {@code DatabaseConfigurationMetadata} encapsulates a {@link DatabaseConfiguration} as well as + * the additional meta information like version (a monotonically incrementing number), and last modified date. + */ +public record DatabaseConfigurationMetadata(DatabaseConfiguration database, long version, long modifiedDate) + implements + SimpleDiffable, + ToXContentObject { + + public static final ParseField DATABASE = new ParseField("database"); + public static final ParseField VERSION = new ParseField("version"); + public static final ParseField MODIFIED_DATE_MILLIS = new ParseField("modified_date_millis"); + public static final ParseField MODIFIED_DATE = new ParseField("modified_date"); + // later, things like this: + // static final ParseField LAST_SUCCESS = new ParseField("last_success"); + // static final ParseField LAST_FAILURE = new ParseField("last_failure"); + + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "database_metadata", + true, + a -> { + DatabaseConfiguration database = (DatabaseConfiguration) a[0]; + return new DatabaseConfigurationMetadata(database, (long) a[1], (long) a[2]); + } + ); + static { + PARSER.declareObject(ConstructingObjectParser.constructorArg(), DatabaseConfiguration::parse, DATABASE); + PARSER.declareLong(ConstructingObjectParser.constructorArg(), VERSION); + PARSER.declareLong(ConstructingObjectParser.constructorArg(), MODIFIED_DATE_MILLIS); + } + + public static DatabaseConfigurationMetadata parse(XContentParser parser, String name) { + return PARSER.apply(parser, name); + } + + public DatabaseConfigurationMetadata(StreamInput in) throws IOException { + this(new DatabaseConfiguration(in), in.readVLong(), in.readVLong()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + // this is cluster state serialization, the id is implicit and doesn't need to included here + // (we'll be a in a json map where the id is the key) + builder.startObject(); + builder.field(VERSION.getPreferredName(), version); + builder.timeField(MODIFIED_DATE_MILLIS.getPreferredName(), MODIFIED_DATE.getPreferredName(), modifiedDate); + builder.field(DATABASE.getPreferredName(), database); + builder.endObject(); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + database.writeTo(out); + out.writeVLong(version); + out.writeVLong(modifiedDate); + } + + public static Diff readDiffFrom(StreamInput in) throws IOException { + return SimpleDiffable.readDiffFrom(DatabaseConfigurationMetadata::new, in); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DeleteDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DeleteDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..843cc986c47e7 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DeleteDatabaseConfigurationAction.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.TimeValue; + +import java.io.IOException; +import java.util.Objects; + +public class DeleteDatabaseConfigurationAction extends ActionType { + public static final DeleteDatabaseConfigurationAction INSTANCE = new DeleteDatabaseConfigurationAction(); + public static final String NAME = "cluster:admin/ingest/geoip/database/delete"; + + protected DeleteDatabaseConfigurationAction() { + super(NAME); + } + + public static class Request extends AcknowledgedRequest { + + private final String databaseId; + + public Request(StreamInput in) throws IOException { + super(in); + databaseId = in.readString(); + } + + public Request(TimeValue masterNodeTimeout, TimeValue ackTimeout, String databaseId) { + super(masterNodeTimeout, ackTimeout); + this.databaseId = Objects.requireNonNull(databaseId, "id may not be null"); + } + + public String getDatabaseId() { + return this.databaseId; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(databaseId); + } + + @Override + public int hashCode() { + return databaseId.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Request other = (Request) obj; + return Objects.equals(databaseId, other.databaseId); + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..546c0c2df821d --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java @@ -0,0 +1,142 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.DATABASE; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.MODIFIED_DATE; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.MODIFIED_DATE_MILLIS; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.VERSION; + +public class GetDatabaseConfigurationAction extends ActionType { + public static final GetDatabaseConfigurationAction INSTANCE = new GetDatabaseConfigurationAction(); + public static final String NAME = "cluster:admin/ingest/geoip/database/get"; + + protected GetDatabaseConfigurationAction() { + super(NAME); + } + + public static class Request extends AcknowledgedRequest { + + private final String[] databaseIds; + + public Request(TimeValue masterNodeTimeout, TimeValue ackTimeout, String... databaseIds) { + super(masterNodeTimeout, ackTimeout); + this.databaseIds = Objects.requireNonNull(databaseIds, "ids may not be null"); + } + + public Request(StreamInput in) throws IOException { + super(in); + databaseIds = in.readStringArray(); + } + + public String[] getDatabaseIds() { + return this.databaseIds; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeStringArray(databaseIds); + } + + @Override + public int hashCode() { + return Arrays.hashCode(databaseIds); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Request other = (Request) obj; + return Arrays.equals(databaseIds, other.databaseIds); + } + } + + public static class Response extends ActionResponse implements ToXContentObject { + + private final List databases; + + public Response(List databases) { + this.databases = List.copyOf(databases); // defensive copy + } + + public Response(StreamInput in) throws IOException { + this(in.readCollectionAsList(DatabaseConfigurationMetadata::new)); + } + + public List getDatabases() { + return this.databases; + } + + @Override + public String toString() { + return Strings.toString(this); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startArray("databases"); + for (DatabaseConfigurationMetadata item : databases) { + DatabaseConfiguration database = item.database(); + builder.startObject(); + builder.field("id", database.id()); // serialize including the id -- this is get response serialization + builder.field(VERSION.getPreferredName(), item.version()); + builder.timeField(MODIFIED_DATE_MILLIS.getPreferredName(), MODIFIED_DATE.getPreferredName(), item.modifiedDate()); + builder.field(DATABASE.getPreferredName(), database); + builder.endObject(); + } + builder.endArray(); + builder.endObject(); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeCollection(databases); + } + + @Override + public int hashCode() { + return Objects.hash(databases); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Response other = (Response) obj; + return databases.equals(other.databases); + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..7bd5e1fa5cc68 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Objects; + +public class PutDatabaseConfigurationAction extends ActionType { + public static final PutDatabaseConfigurationAction INSTANCE = new PutDatabaseConfigurationAction(); + public static final String NAME = "cluster:admin/ingest/geoip/database/put"; + + protected PutDatabaseConfigurationAction() { + super(NAME); + } + + public static class Request extends AcknowledgedRequest { + + private final DatabaseConfiguration database; + + public Request(TimeValue masterNodeTimeout, TimeValue ackTimeout, DatabaseConfiguration database) { + super(masterNodeTimeout, ackTimeout); + this.database = database; + } + + public Request(StreamInput in) throws IOException { + super(in); + database = new DatabaseConfiguration(in); + } + + public DatabaseConfiguration getDatabase() { + return this.database; + } + + public static Request parseRequest(TimeValue masterNodeTimeout, TimeValue ackTimeout, String id, XContentParser parser) { + return new Request(masterNodeTimeout, ackTimeout, DatabaseConfiguration.parse(parser, id)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + database.writeTo(out); + } + + @Override + public ActionRequestValidationException validate() { + return database.validate(); + } + + @Override + public int hashCode() { + return Objects.hash(database); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Request other = (Request) obj; + return database.equals(other.database); + } + + @Override + public String toString() { + return Strings.toString((b, p) -> b.field(database.id(), database)); + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestDeleteDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestDeleteDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..4dc263224ad0a --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestDeleteDatabaseConfigurationAction.java @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestToXContentListener; + +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.DELETE; +import static org.elasticsearch.rest.RestUtils.getAckTimeout; +import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout; + +@ServerlessScope(Scope.INTERNAL) +public class RestDeleteDatabaseConfigurationAction extends BaseRestHandler { + + @Override + public List routes() { + return List.of(new Route(DELETE, "/_ingest/geoip/database/{id}")); + } + + @Override + public String getName() { + return "geoip_delete_database_configuration"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { + final var req = new DeleteDatabaseConfigurationAction.Request( + getMasterNodeTimeout(request), + getAckTimeout(request), + request.param("id") + ); + return channel -> client.execute(DeleteDatabaseConfigurationAction.INSTANCE, req, new RestToXContentListener<>(channel)); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..b237ceb638918 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.common.Strings; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestToXContentListener; + +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.GET; +import static org.elasticsearch.rest.RestUtils.getAckTimeout; +import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout; + +@ServerlessScope(Scope.INTERNAL) +public class RestGetDatabaseConfigurationAction extends BaseRestHandler { + + @Override + public List routes() { + return List.of(new Route(GET, "/_ingest/geoip/database"), new Route(GET, "/_ingest/geoip/database/{id}")); + } + + @Override + public String getName() { + return "geoip_get_database_configuration"; + } + + @Override + protected RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) { + final var req = new GetDatabaseConfigurationAction.Request( + getMasterNodeTimeout(request), + getAckTimeout(request), + Strings.splitStringByCommaToArray(request.param("id")) + ); + return channel -> client.execute(GetDatabaseConfigurationAction.INSTANCE, req, new RestToXContentListener<>(channel)); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestPutDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestPutDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..62b01b930d5cd --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestPutDatabaseConfigurationAction.java @@ -0,0 +1,52 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction.Request; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.PUT; +import static org.elasticsearch.rest.RestUtils.getAckTimeout; +import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout; + +@ServerlessScope(Scope.INTERNAL) +public class RestPutDatabaseConfigurationAction extends BaseRestHandler { + + @Override + public List routes() { + return List.of(new Route(PUT, "/_ingest/geoip/database/{id}")); + } + + @Override + public String getName() { + return "geoip_put_database_configuration"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + final Request req; + try (var parser = request.contentParser()) { + req = PutDatabaseConfigurationAction.Request.parseRequest( + getMasterNodeTimeout(request), + getAckTimeout(request), + request.param("id"), + parser + ); + } + return channel -> client.execute(PutDatabaseConfigurationAction.INSTANCE, req, new RestToXContentListener<>(channel)); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..43aacee956279 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java @@ -0,0 +1,128 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateTaskListener; +import org.elasticsearch.cluster.SimpleBatchedExecutor; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.core.Strings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.ingest.geoip.direct.DeleteDatabaseConfigurationAction.Request; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.util.HashMap; +import java.util.Map; + +public class TransportDeleteDatabaseConfigurationAction extends TransportMasterNodeAction { + + private static final Logger logger = LogManager.getLogger(TransportDeleteDatabaseConfigurationAction.class); + + private static final SimpleBatchedExecutor DELETE_TASK_EXECUTOR = new SimpleBatchedExecutor<>() { + @Override + public Tuple executeTask(DeleteDatabaseConfigurationTask task, ClusterState clusterState) throws Exception { + return Tuple.tuple(task.execute(clusterState), null); + } + + @Override + public void taskSucceeded(DeleteDatabaseConfigurationTask task, Void unused) { + logger.trace("Updated cluster state for deletion of database configuration [{}]", task.databaseId); + task.listener.onResponse(AcknowledgedResponse.TRUE); + } + }; + + private final MasterServiceTaskQueue deleteDatabaseConfigurationTaskQueue; + + @Inject + public TransportDeleteDatabaseConfigurationAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + DeleteDatabaseConfigurationAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + Request::new, + indexNameExpressionResolver, + AcknowledgedResponse::readFrom, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + this.deleteDatabaseConfigurationTaskQueue = clusterService.createTaskQueue( + "delete-geoip-database-configuration-state-update", + Priority.NORMAL, + DELETE_TASK_EXECUTOR + ); + } + + @Override + protected void masterOperation(Task task, Request request, ClusterState state, ActionListener listener) + throws Exception { + final String id = request.getDatabaseId(); + final IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + if (geoIpMeta.getDatabases().containsKey(id) == false) { + throw new ResourceNotFoundException("Database configuration not found: {}", id); + } + deleteDatabaseConfigurationTaskQueue.submitTask( + Strings.format("delete-geoip-database-configuration-[%s]", id), + new DeleteDatabaseConfigurationTask(listener, id), + null + ); + } + + private record DeleteDatabaseConfigurationTask(ActionListener listener, String databaseId) + implements + ClusterStateTaskListener { + + ClusterState execute(ClusterState currentState) throws Exception { + final IngestGeoIpMetadata geoIpMeta = currentState.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + logger.debug("deleting database configuration [{}]", databaseId); + Map databases = new HashMap<>(geoIpMeta.getDatabases()); + databases.remove(databaseId); + + Metadata currentMeta = currentState.metadata(); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentMeta).putCustom(IngestGeoIpMetadata.TYPE, new IngestGeoIpMetadata(databases))) + .build(); + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + } + } + + @Override + protected ClusterBlockException checkBlock(Request request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..a14a143e3f404 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java @@ -0,0 +1,109 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class TransportGetDatabaseConfigurationAction extends TransportMasterNodeAction< + GetDatabaseConfigurationAction.Request, + GetDatabaseConfigurationAction.Response> { + + @Inject + public TransportGetDatabaseConfigurationAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + GetDatabaseConfigurationAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + GetDatabaseConfigurationAction.Request::new, + indexNameExpressionResolver, + GetDatabaseConfigurationAction.Response::new, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + } + + @Override + protected void masterOperation( + final Task task, + final GetDatabaseConfigurationAction.Request request, + final ClusterState state, + final ActionListener listener + ) { + final Set ids; + if (request.getDatabaseIds().length == 0) { + // if we did not ask for a specific name, then return all databases + ids = Set.of("*"); + } else { + ids = new LinkedHashSet<>(Arrays.asList(request.getDatabaseIds())); + } + + if (ids.size() > 1 && ids.stream().anyMatch(Regex::isSimpleMatchPattern)) { + throw new IllegalArgumentException( + "wildcard only supports a single value, please use comma-separated values or a single wildcard value" + ); + } + + final IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + List results = new ArrayList<>(); + + for (String id : ids) { + if (Regex.isSimpleMatchPattern(id)) { + for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { + if (Regex.simpleMatch(id, entry.getKey())) { + results.add(entry.getValue()); + } + } + } else { + DatabaseConfigurationMetadata meta = geoIpMeta.getDatabases().get(id); + if (meta == null) { + listener.onFailure(new ResourceNotFoundException("database configuration not found: {}", id)); + return; + } else { + results.add(meta); + } + } + } + + listener.onResponse(new GetDatabaseConfigurationAction.Response(results)); + } + + @Override + protected ClusterBlockException checkBlock(GetDatabaseConfigurationAction.Request request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_READ); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..540be68671d38 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationAction.java @@ -0,0 +1,178 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateTaskListener; +import org.elasticsearch.cluster.SimpleBatchedExecutor; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Strings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction.Request; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.time.Instant; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +public class TransportPutDatabaseConfigurationAction extends TransportMasterNodeAction { + + private static final Logger logger = LogManager.getLogger(TransportPutDatabaseConfigurationAction.class); + + private static final SimpleBatchedExecutor UPDATE_TASK_EXECUTOR = new SimpleBatchedExecutor<>() { + @Override + public Tuple executeTask(UpdateDatabaseConfigurationTask task, ClusterState clusterState) throws Exception { + return Tuple.tuple(task.execute(clusterState), null); + } + + @Override + public void taskSucceeded(UpdateDatabaseConfigurationTask task, Void unused) { + logger.trace("Updated cluster state for creation-or-update of database configuration [{}]", task.database.id()); + task.listener.onResponse(AcknowledgedResponse.TRUE); + } + }; + + private final MasterServiceTaskQueue updateDatabaseConfigurationTaskQueue; + + @Inject + public TransportPutDatabaseConfigurationAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + PutDatabaseConfigurationAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + Request::new, + indexNameExpressionResolver, + AcknowledgedResponse::readFrom, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + this.updateDatabaseConfigurationTaskQueue = clusterService.createTaskQueue( + "update-geoip-database-configuration-state-update", + Priority.NORMAL, + UPDATE_TASK_EXECUTOR + ); + } + + @Override + protected void masterOperation(Task task, Request request, ClusterState state, ActionListener listener) { + final String id = request.getDatabase().id(); + updateDatabaseConfigurationTaskQueue.submitTask( + Strings.format("update-geoip-database-configuration-[%s]", id), + new UpdateDatabaseConfigurationTask(listener, request.getDatabase()), + null + ); + } + + /** + * Returns 'true' if the database configuration is effectually the same, and thus can be a no-op update. + */ + static boolean isNoopUpdate(@Nullable DatabaseConfigurationMetadata existingDatabase, DatabaseConfiguration newDatabase) { + if (existingDatabase == null) { + return false; + } else { + return newDatabase.equals(existingDatabase.database()); + } + } + + static void validatePrerequisites(DatabaseConfiguration database, ClusterState state) { + // we need to verify that the database represents a unique file (name) among the various databases for this same provider + IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + Optional sameName = geoIpMeta.getDatabases() + .values() + .stream() + .map(DatabaseConfigurationMetadata::database) + // .filter(d -> d.type().equals(database.type())) // of the same type (right now the type is always just 'maxmind') + .filter(d -> d.id().equals(database.id()) == false) // and a different id + .filter(d -> d.name().equals(database.name())) // but has the same name! + .findFirst(); + + sameName.ifPresent(d -> { + throw new IllegalArgumentException( + Strings.format("database [%s] is already being downloaded via configuration [%s]", database.name(), d.id()) + ); + }); + } + + private record UpdateDatabaseConfigurationTask(ActionListener listener, DatabaseConfiguration database) + implements + ClusterStateTaskListener { + + ClusterState execute(ClusterState currentState) throws Exception { + IngestGeoIpMetadata geoIpMeta = currentState.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + String id = database.id(); + final DatabaseConfigurationMetadata existingDatabase = geoIpMeta.getDatabases().get(id); + // double-check for no-op in the state update task, in case it was changed/reset in the meantime + if (isNoopUpdate(existingDatabase, database)) { + return currentState; + } + + validatePrerequisites(database, currentState); + + Map databases = new HashMap<>(geoIpMeta.getDatabases()); + databases.put( + id, + new DatabaseConfigurationMetadata( + database, + existingDatabase == null ? 1 : existingDatabase.version() + 1, + Instant.now().toEpochMilli() + ) + ); + geoIpMeta = new IngestGeoIpMetadata(databases); + + if (existingDatabase == null) { + logger.debug("adding new database configuration [{}]", id); + } else { + logger.debug("updating existing database configuration [{}]", id); + } + + Metadata currentMeta = currentState.metadata(); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentMeta).putCustom(IngestGeoIpMetadata.TYPE, geoIpMeta)) + .build(); + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + } + } + + @Override + protected ClusterBlockException checkBlock(Request request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java new file mode 100644 index 0000000000000..58cb566165db2 --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java @@ -0,0 +1,538 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.DocWriteRequest.OpType; +import org.elasticsearch.action.DocWriteResponse; +import org.elasticsearch.action.admin.indices.flush.FlushAction; +import org.elasticsearch.action.admin.indices.flush.FlushRequest; +import org.elasticsearch.action.admin.indices.refresh.RefreshAction; +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.action.index.TransportIndexAction; +import org.elasticsearch.action.support.broadcast.BroadcastResponse; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlocks; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.hash.MessageDigests; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.EnterpriseGeoIpTask; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.node.Node; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.persistent.PersistentTasksService; +import org.elasticsearch.telemetry.metric.MeterRegistry; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.client.NoOpClient; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xcontent.XContentType; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.PasswordAuthentication; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; + +import static org.elasticsearch.ingest.geoip.DatabaseNodeServiceTests.createClusterState; +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloader.MAX_CHUNK_SIZE; +import static org.elasticsearch.tasks.TaskId.EMPTY_TASK_ID; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.when; + +public class EnterpriseGeoIpDownloaderTests extends ESTestCase { + + private HttpClient httpClient; + private ClusterService clusterService; + private ThreadPool threadPool; + private MockClient client; + private EnterpriseGeoIpDownloader geoIpDownloader; + + @Before + public void setup() throws IOException { + httpClient = mock(HttpClient.class); + when(httpClient.getBytes(any(), anyString())).thenReturn( + "e4a3411cdd7b21eaf18675da5a7f9f360d33c6882363b2c19c38715834c9e836 GeoIP2-City_20240709.tar.gz".getBytes(StandardCharsets.UTF_8) + ); + clusterService = mock(ClusterService.class); + threadPool = new ThreadPool(Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test").build(), MeterRegistry.NOOP); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, Set.of(GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING)) + ); + ClusterState state = createClusterState(new PersistentTasksCustomMetadata(1L, Map.of())); + when(clusterService.state()).thenReturn(state); + client = new MockClient(threadPool); + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + { + EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams geoIpTaskParams = mock(EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams.class); + when(geoIpTaskParams.getWriteableName()).thenReturn(EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER); + init(new PersistentTasksService(clusterService, threadPool, client), null, null, 0); + } + }; + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testGetChunkEndOfStream() throws IOException { + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(new InputStream() { + @Override + public int read() { + return -1; + } + }); + assertArrayEquals(new byte[0], chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(new ByteArrayInputStream(new byte[0])); + assertArrayEquals(new byte[0], chunk); + } + + public void testGetChunkLessThanChunkSize() throws IOException { + ByteArrayInputStream is = new ByteArrayInputStream(new byte[] { 1, 2, 3, 4 }); + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[] { 1, 2, 3, 4 }, chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[0], chunk); + + } + + public void testGetChunkExactlyChunkSize() throws IOException { + byte[] bigArray = new byte[MAX_CHUNK_SIZE]; + for (int i = 0; i < MAX_CHUNK_SIZE; i++) { + bigArray[i] = (byte) i; + } + ByteArrayInputStream is = new ByteArrayInputStream(bigArray); + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(bigArray, chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[0], chunk); + } + + public void testGetChunkMoreThanChunkSize() throws IOException { + byte[] bigArray = new byte[MAX_CHUNK_SIZE * 2]; + for (int i = 0; i < MAX_CHUNK_SIZE * 2; i++) { + bigArray[i] = (byte) i; + } + byte[] smallArray = new byte[MAX_CHUNK_SIZE]; + System.arraycopy(bigArray, 0, smallArray, 0, MAX_CHUNK_SIZE); + ByteArrayInputStream is = new ByteArrayInputStream(bigArray); + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(smallArray, chunk); + System.arraycopy(bigArray, MAX_CHUNK_SIZE, smallArray, 0, MAX_CHUNK_SIZE); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(smallArray, chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[0], chunk); + } + + public void testGetChunkRethrowsIOException() { + expectThrows(IOException.class, () -> EnterpriseGeoIpDownloader.getChunk(new InputStream() { + @Override + public int read() throws IOException { + throw new IOException(); + } + })); + } + + public void testIndexChunksNoData() throws IOException { + client.addHandler(FlushAction.INSTANCE, (FlushRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + }); + client.addHandler( + RefreshAction.INSTANCE, + (RefreshRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + } + ); + + InputStream empty = new ByteArrayInputStream(new byte[0]); + assertEquals( + Tuple.tuple(0, "d41d8cd98f00b204e9800998ecf8427e"), + geoIpDownloader.indexChunks( + "test", + empty, + 0, + MessageDigests.sha256(), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + 0 + ) + ); + } + + public void testIndexChunksMd5Mismatch() { + client.addHandler(FlushAction.INSTANCE, (FlushRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + }); + client.addHandler( + RefreshAction.INSTANCE, + (RefreshRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + } + ); + + IOException exception = expectThrows( + IOException.class, + () -> geoIpDownloader.indexChunks("test", new ByteArrayInputStream(new byte[0]), 0, MessageDigests.sha256(), "123123", 0) + ); + assertEquals( + "checksum mismatch, expected [123123], actual [e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855]", + exception.getMessage() + ); + } + + public void testIndexChunks() throws IOException { + byte[] bigArray = new byte[MAX_CHUNK_SIZE + 20]; + for (int i = 0; i < MAX_CHUNK_SIZE + 20; i++) { + bigArray[i] = (byte) i; + } + byte[][] chunksData = new byte[2][]; + chunksData[0] = new byte[MAX_CHUNK_SIZE]; + System.arraycopy(bigArray, 0, chunksData[0], 0, MAX_CHUNK_SIZE); + chunksData[1] = new byte[20]; + System.arraycopy(bigArray, MAX_CHUNK_SIZE, chunksData[1], 0, 20); + + AtomicInteger chunkIndex = new AtomicInteger(); + + client.addHandler(TransportIndexAction.TYPE, (IndexRequest request, ActionListener listener) -> { + int chunk = chunkIndex.getAndIncrement(); + assertEquals(OpType.CREATE, request.opType()); + assertThat(request.id(), Matchers.startsWith("test_" + (chunk + 15) + "_")); + assertEquals(XContentType.SMILE, request.getContentType()); + Map source = request.sourceAsMap(); + assertEquals("test", source.get("name")); + assertArrayEquals(chunksData[chunk], (byte[]) source.get("data")); + assertEquals(chunk + 15, source.get("chunk")); + listener.onResponse(mock(IndexResponse.class)); + }); + client.addHandler(FlushAction.INSTANCE, (FlushRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + }); + client.addHandler( + RefreshAction.INSTANCE, + (RefreshRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + } + ); + + InputStream big = new ByteArrayInputStream(bigArray); + assertEquals( + Tuple.tuple(17, "a67563dfa8f3cba8b8cff61eb989a749"), + geoIpDownloader.indexChunks( + "test", + big, + 15, + MessageDigests.sha256(), + "f2304545f224ff9ffcc585cb0a993723f911e03beb552cc03937dd443e931eab", + 0 + ) + ); + + assertEquals(2, chunkIndex.get()); + } + + public void testProcessDatabaseNew() throws IOException { + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get(any(), any())).thenReturn(bais); + AtomicBoolean indexedChunks = new AtomicBoolean(false); + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + @Override + protected void updateTimestamp(String name, GeoIpTaskState.Metadata metadata) { + fail(); + } + + @Override + Tuple indexChunks( + String name, + InputStream is, + int chunk, + MessageDigest digest, + String expectedMd5, + long start + ) { + assertSame(bais, is); + assertEquals(0, chunk); + indexedChunks.set(true); + return Tuple.tuple(11, expectedMd5); + } + + @Override + void updateTaskState() { + assertEquals(0, state.getDatabases().get("test.mmdb").firstChunk()); + assertEquals(10, state.getDatabases().get("test.mmdb").lastChunk()); + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + assertEquals("test.mmdb", name); + assertEquals(0, firstChunk); + } + }; + + geoIpDownloader.setState(EnterpriseGeoIpTaskState.EMPTY); + PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); + String id = randomIdentifier(); + DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); + geoIpDownloader.processDatabase(auth, databaseConfiguration); + assertThat(indexedChunks.get(), equalTo(true)); + } + + public void testProcessDatabaseUpdate() throws IOException { + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get(any(), any())).thenReturn(bais); + AtomicBoolean indexedChunks = new AtomicBoolean(false); + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + @Override + protected void updateTimestamp(String name, GeoIpTaskState.Metadata metadata) { + fail(); + } + + @Override + Tuple indexChunks( + String name, + InputStream is, + int chunk, + MessageDigest digest, + String expectedMd5, + long start + ) { + assertSame(bais, is); + assertEquals(9, chunk); + indexedChunks.set(true); + return Tuple.tuple(1, expectedMd5); + } + + @Override + void updateTaskState() { + assertEquals(9, state.getDatabases().get("test.mmdb").firstChunk()); + assertEquals(10, state.getDatabases().get("test.mmdb").lastChunk()); + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + assertEquals("test.mmdb", name); + assertEquals(9, firstChunk); + } + }; + + geoIpDownloader.setState(EnterpriseGeoIpTaskState.EMPTY.put("test.mmdb", new GeoIpTaskState.Metadata(0, 5, 8, "0", 0))); + PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); + String id = randomIdentifier(); + DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); + geoIpDownloader.processDatabase(auth, databaseConfiguration); + assertThat(indexedChunks.get(), equalTo(true)); + } + + public void testProcessDatabaseSame() throws IOException { + GeoIpTaskState.Metadata metadata = new GeoIpTaskState.Metadata( + 0, + 4, + 10, + "1", + 0, + "e4a3411cdd7b21eaf18675da5a7f9f360d33c6882363b2c19c38715834c9e836" + ); + EnterpriseGeoIpTaskState taskState = EnterpriseGeoIpTaskState.EMPTY.put("test.mmdb", metadata); + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get(any(), any())).thenReturn(bais); + + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + @Override + protected void updateTimestamp(String name, GeoIpTaskState.Metadata newMetadata) { + assertEquals(metadata, newMetadata); + assertEquals("test.mmdb", name); + } + + @Override + Tuple indexChunks( + String name, + InputStream is, + int chunk, + MessageDigest digest, + String expectedChecksum, + long start + ) { + fail(); + return Tuple.tuple(0, expectedChecksum); + } + + @Override + void updateTaskState() { + fail(); + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + fail(); + } + }; + geoIpDownloader.setState(taskState); + PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); + String id = randomIdentifier(); + DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); + geoIpDownloader.processDatabase(auth, databaseConfiguration); + } + + public void testUpdateDatabasesWriteBlock() { + ClusterState state = createClusterState(new PersistentTasksCustomMetadata(1L, Map.of())); + var geoIpIndex = state.getMetadata().getIndicesLookup().get(EnterpriseGeoIpDownloader.DATABASES_INDEX).getWriteIndex().getName(); + state = ClusterState.builder(state) + .blocks(new ClusterBlocks.Builder().addIndexBlock(geoIpIndex, IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK)) + .build(); + when(clusterService.state()).thenReturn(state); + var e = expectThrows(ClusterBlockException.class, () -> geoIpDownloader.updateDatabases()); + assertThat( + e.getMessage(), + equalTo( + "index [" + + geoIpIndex + + "] blocked by: [TOO_MANY_REQUESTS/12/disk usage exceeded flood-stage watermark, " + + "index has read-only-allow-delete block];" + ) + ); + verifyNoInteractions(httpClient); + } + + public void testUpdateDatabasesIndexNotReady() throws IOException { + ClusterState state = createClusterState(new PersistentTasksCustomMetadata(1L, Map.of()), true); + var geoIpIndex = state.getMetadata().getIndicesLookup().get(EnterpriseGeoIpDownloader.DATABASES_INDEX).getWriteIndex().getName(); + state = ClusterState.builder(state) + .blocks(new ClusterBlocks.Builder().addIndexBlock(geoIpIndex, IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK)) + .build(); + when(clusterService.state()).thenReturn(state); + geoIpDownloader.updateDatabases(); + verifyNoInteractions(httpClient); + } + + private GeoIpTaskState.Metadata newGeoIpTaskStateMetadata(boolean expired) { + Instant lastChecked; + if (expired) { + lastChecked = Instant.now().minus(randomIntBetween(31, 100), ChronoUnit.DAYS); + } else { + lastChecked = Instant.now().minus(randomIntBetween(0, 29), ChronoUnit.DAYS); + } + return new GeoIpTaskState.Metadata(0, 0, 0, randomAlphaOfLength(20), lastChecked.toEpochMilli()); + } + + private static class MockClient extends NoOpClient { + + private final Map, BiConsumer>> handlers = new HashMap<>(); + + private MockClient(ThreadPool threadPool) { + super(threadPool); + } + + public void addHandler( + ActionType action, + BiConsumer> listener + ) { + handlers.put(action, listener); + } + + @SuppressWarnings("unchecked") + @Override + protected void doExecute( + ActionType action, + Request request, + ActionListener listener + ) { + if (handlers.containsKey(action)) { + BiConsumer> biConsumer = (BiConsumer>) handlers.get( + action + ); + biConsumer.accept(request, listener); + } else { + throw new IllegalStateException("unexpected action called [" + action.name() + "]"); + } + } + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskStateSerializationTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskStateSerializationTests.java new file mode 100644 index 0000000000000..a136f90780989 --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskStateSerializationTests.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class EnterpriseGeoIpTaskStateSerializationTests extends AbstractXContentSerializingTestCase { + @Override + protected GeoIpTaskState doParseInstance(XContentParser parser) throws IOException { + return GeoIpTaskState.fromXContent(parser); + } + + @Override + protected Writeable.Reader instanceReader() { + return GeoIpTaskState::new; + } + + @Override + protected GeoIpTaskState createTestInstance() { + GeoIpTaskState state = GeoIpTaskState.EMPTY; + int databaseCount = randomInt(20); + for (int i = 0; i < databaseCount; i++) { + state = state.put(randomAlphaOfLengthBetween(5, 10), createRandomMetadata()); + } + return state; + } + + @Override + protected GeoIpTaskState mutateInstance(GeoIpTaskState instance) { + Map databases = new HashMap<>(instance.getDatabases()); + switch (between(0, 2)) { + case 0: + String databaseName = randomValueOtherThanMany(databases::containsKey, () -> randomAlphaOfLengthBetween(5, 10)); + databases.put(databaseName, createRandomMetadata()); + return new GeoIpTaskState(databases); + case 1: + if (databases.size() > 0) { + String randomDatabaseName = databases.keySet().iterator().next(); + databases.put(randomDatabaseName, createRandomMetadata()); + } else { + databases.put(randomAlphaOfLengthBetween(5, 10), createRandomMetadata()); + } + return new GeoIpTaskState(databases); + case 2: + if (databases.size() > 0) { + String randomDatabaseName = databases.keySet().iterator().next(); + databases.remove(randomDatabaseName); + } else { + databases.put(randomAlphaOfLengthBetween(5, 10), createRandomMetadata()); + } + return new GeoIpTaskState(databases); + default: + throw new AssertionError("failure, got illegal switch case"); + } + } + + private GeoIpTaskState.Metadata createRandomMetadata() { + return new GeoIpTaskState.Metadata(randomLong(), randomInt(), randomInt(), randomAlphaOfLength(32), randomLong()); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java index 6a83fe69473f7..06b2605bd6d41 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java @@ -426,6 +426,55 @@ void deleteOldChunks(String name, int firstChunk) { assertEquals(0, stats.getFailedDownloads()); } + public void testCleanDatabases() throws IOException { + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get("http://a.b/t1")).thenReturn(bais); + + final AtomicInteger count = new AtomicInteger(0); + + geoIpDownloader = new GeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + Settings.EMPTY, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + () -> GeoIpDownloaderTaskExecutor.EAGER_DOWNLOAD_SETTING.getDefault(Settings.EMPTY), + () -> true + ) { + @Override + void updateDatabases() throws IOException { + // noop + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + count.incrementAndGet(); + assertEquals("test.mmdb", name); + assertEquals(21, firstChunk); + } + + @Override + void updateTaskState() { + // noop + } + }; + + geoIpDownloader.setState(GeoIpTaskState.EMPTY.put("test.mmdb", new GeoIpTaskState.Metadata(10, 10, 20, "md5", 20))); + geoIpDownloader.runDownloader(); + geoIpDownloader.runDownloader(); + GeoIpDownloaderStats stats = geoIpDownloader.getStatus(); + assertEquals(1, stats.getExpiredDatabases()); + assertEquals(2, count.get()); // somewhat surprising, not necessarily wrong + assertEquals(18, geoIpDownloader.state.getDatabases().get("test.mmdb").lastCheck()); // highly surprising, seems wrong + } + @SuppressWarnings("unchecked") public void testUpdateTaskState() { geoIpDownloader = new GeoIpDownloader( diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java new file mode 100644 index 0000000000000..eca23cb13cd3d --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata; +import org.elasticsearch.test.AbstractChunkedSerializingTestCase; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class IngestGeoIpMetadataTests extends AbstractChunkedSerializingTestCase { + @Override + protected IngestGeoIpMetadata doParseInstance(XContentParser parser) throws IOException { + return IngestGeoIpMetadata.fromXContent(parser); + } + + @Override + protected Writeable.Reader instanceReader() { + return IngestGeoIpMetadata::new; + } + + @Override + protected IngestGeoIpMetadata createTestInstance() { + return randomIngestGeoIpMetadata(); + } + + @Override + protected IngestGeoIpMetadata mutateInstance(IngestGeoIpMetadata instance) throws IOException { + Map databases = new HashMap<>(instance.getDatabases()); + switch (between(0, 2)) { + case 0 -> { + String databaseId = randomValueOtherThanMany(databases::containsKey, ESTestCase::randomIdentifier); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + return new IngestGeoIpMetadata(databases); + } + case 1 -> { + if (databases.size() > 0) { + String randomDatabaseId = databases.keySet().iterator().next(); + databases.put(randomDatabaseId, randomDatabaseConfigurationMetadata(randomDatabaseId)); + } else { + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + } + return new IngestGeoIpMetadata(databases); + } + case 2 -> { + if (databases.size() > 0) { + String randomDatabaseId = databases.keySet().iterator().next(); + databases.remove(randomDatabaseId); + } else { + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + } + return new IngestGeoIpMetadata(databases); + } + default -> throw new AssertionError("failure, got illegal switch case"); + } + } + + private IngestGeoIpMetadata randomIngestGeoIpMetadata() { + Map databases = new HashMap<>(); + for (int i = 0; i < randomIntBetween(0, 20); i++) { + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + } + return new IngestGeoIpMetadata(databases); + } + + private DatabaseConfigurationMetadata randomDatabaseConfigurationMetadata(String id) { + return new DatabaseConfigurationMetadata( + randomDatabaseConfiguration(id), + randomNonNegativeLong(), + randomPositiveTimeValue().millis() + ); + } + + private DatabaseConfiguration randomDatabaseConfiguration(String id) { + return new DatabaseConfiguration(id, randomAlphaOfLength(10), new DatabaseConfiguration.Maxmind(randomAlphaOfLength(10))); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java new file mode 100644 index 0000000000000..f035416d48068 --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.MAXMIND_NAMES; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationTests.randomDatabaseConfiguration; + +public class DatabaseConfigurationMetadataTests extends AbstractXContentSerializingTestCase { + + private String id; + + @Override + protected DatabaseConfigurationMetadata doParseInstance(XContentParser parser) throws IOException { + return DatabaseConfigurationMetadata.parse(parser, id); + } + + @Override + protected DatabaseConfigurationMetadata createTestInstance() { + id = randomAlphaOfLength(5); + return randomDatabaseConfigurationMetadata(id); + } + + public static DatabaseConfigurationMetadata randomDatabaseConfigurationMetadata(String id) { + return new DatabaseConfigurationMetadata( + new DatabaseConfiguration(id, randomFrom(MAXMIND_NAMES), new DatabaseConfiguration.Maxmind(randomAlphaOfLength(5))), + randomNonNegativeLong(), + randomPositiveTimeValue().millis() + ); + } + + @Override + protected DatabaseConfigurationMetadata mutateInstance(DatabaseConfigurationMetadata instance) { + switch (between(0, 2)) { + case 0: + return new DatabaseConfigurationMetadata( + randomValueOtherThan(instance.database(), () -> randomDatabaseConfiguration(randomAlphaOfLength(5))), + instance.version(), + instance.modifiedDate() + ); + case 1: + return new DatabaseConfigurationMetadata( + instance.database(), + randomValueOtherThan(instance.version(), ESTestCase::randomNonNegativeLong), + instance.modifiedDate() + ); + case 2: + return new DatabaseConfigurationMetadata( + instance.database(), + instance.version(), + randomValueOtherThan(instance.modifiedDate(), () -> ESTestCase.randomPositiveTimeValue().millis()) + ); + default: + throw new AssertionError("failure, got illegal switch case"); + } + } + + @Override + protected Writeable.Reader instanceReader() { + return DatabaseConfigurationMetadata::new; + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java new file mode 100644 index 0000000000000..02c067561b49c --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java @@ -0,0 +1,86 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.Maxmind; +import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Set; + +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.MAXMIND_NAMES; + +public class DatabaseConfigurationTests extends AbstractXContentSerializingTestCase { + + private String id; + + @Override + protected DatabaseConfiguration doParseInstance(XContentParser parser) throws IOException { + return DatabaseConfiguration.parse(parser, id); + } + + @Override + protected DatabaseConfiguration createTestInstance() { + id = randomAlphaOfLength(5); + return randomDatabaseConfiguration(id); + } + + public static DatabaseConfiguration randomDatabaseConfiguration(String id) { + return new DatabaseConfiguration(id, randomFrom(MAXMIND_NAMES), new Maxmind(randomAlphaOfLength(5))); + } + + @Override + protected DatabaseConfiguration mutateInstance(DatabaseConfiguration instance) { + switch (between(0, 2)) { + case 0: + return new DatabaseConfiguration(instance.id() + randomAlphaOfLength(2), instance.name(), instance.maxmind()); + case 1: + return new DatabaseConfiguration( + instance.id(), + randomValueOtherThan(instance.name(), () -> randomFrom(MAXMIND_NAMES)), + instance.maxmind() + ); + case 2: + return new DatabaseConfiguration( + instance.id(), + instance.name(), + new Maxmind(instance.maxmind().accountId() + randomAlphaOfLength(2)) + ); + default: + throw new AssertionError("failure, got illegal switch case"); + } + } + + @Override + protected Writeable.Reader instanceReader() { + return DatabaseConfiguration::new; + } + + public void testValidateId() { + Set invalidIds = Set.of("-foo", "_foo", "foo,bar", "foo bar", "foo*bar", "foo.bar"); + for (String id : invalidIds) { + expectThrows(IllegalArgumentException.class, "expected exception for " + id, () -> DatabaseConfiguration.validateId(id)); + } + Set validIds = Set.of("f-oo", "f_oo", "foobar"); + for (String id : validIds) { + DatabaseConfiguration.validateId(id); + } + // Note: the code checks for byte length, but randomAlphoOfLength is only using characters in the ascii subset + String longId = randomAlphaOfLength(128); + expectThrows(IllegalArgumentException.class, "expected exception for " + longId, () -> DatabaseConfiguration.validateId(longId)); + String longestAllowedId = randomAlphaOfLength(127); + DatabaseConfiguration.validateId(longestAllowedId); + String shortId = randomAlphaOfLengthBetween(1, 127); + DatabaseConfiguration.validateId(shortId); + expectThrows(IllegalArgumentException.class, "expected exception for empty string", () -> DatabaseConfiguration.validateId("")); + expectThrows(IllegalArgumentException.class, "expected exception for null string", () -> DatabaseConfiguration.validateId(null)); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationActionTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationActionTests.java new file mode 100644 index 0000000000000..710c3ee23916d --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationActionTests.java @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.test.ESTestCase; + +import java.util.HashMap; +import java.util.Map; + +public class TransportPutDatabaseConfigurationActionTests extends ESTestCase { + + public void testValidatePrerequisites() { + // Test that we reject two configurations with the same database name but different ids: + String name = randomAlphaOfLengthBetween(1, 50); + IngestGeoIpMetadata ingestGeoIpMetadata = randomIngestGeoIpMetadata(name); + ClusterState state = ClusterState.builder(ClusterState.EMPTY_STATE) + .metadata(Metadata.builder(Metadata.EMPTY_METADATA).putCustom(IngestGeoIpMetadata.TYPE, ingestGeoIpMetadata)) + .build(); + DatabaseConfiguration databaseConfiguration = randomDatabaseConfiguration(randomIdentifier(), name); + expectThrows( + IllegalArgumentException.class, + () -> TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfiguration, state) + ); + + // Test that we do not reject two configurations with different database names: + String differentName = randomValueOtherThan(name, () -> randomAlphaOfLengthBetween(1, 50)); + DatabaseConfiguration databaseConfigurationForDifferentName = randomDatabaseConfiguration(randomIdentifier(), differentName); + TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfigurationForDifferentName, state); + + // Test that we do not reject a configuration if none already exists: + TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfiguration, ClusterState.EMPTY_STATE); + + // Test that we do not reject a configuration if one with the same database name AND id already exists: + DatabaseConfiguration databaseConfigurationSameNameSameId = ingestGeoIpMetadata.getDatabases() + .values() + .iterator() + .next() + .database(); + TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfigurationSameNameSameId, state); + } + + private IngestGeoIpMetadata randomIngestGeoIpMetadata(String name) { + Map databases = new HashMap<>(); + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId, name)); + return new IngestGeoIpMetadata(databases); + } + + private DatabaseConfigurationMetadata randomDatabaseConfigurationMetadata(String id, String name) { + return new DatabaseConfigurationMetadata( + randomDatabaseConfiguration(id, name), + randomNonNegativeLong(), + randomPositiveTimeValue().millis() + ); + } + + private DatabaseConfiguration randomDatabaseConfiguration(String id, String name) { + return new DatabaseConfiguration(id, name, new DatabaseConfiguration.Maxmind(randomAlphaOfLength(10))); + } +} diff --git a/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java b/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java index 58a6e3771b30d..0f0a0c998bd75 100644 --- a/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java +++ b/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java @@ -46,7 +46,12 @@ public class IngestGeoIpClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase .module("reindex") .module("ingest-geoip") .systemProperty("ingest.geoip.downloader.enabled.default", "true") + // sets the plain (geoip.elastic.co) downloader endpoint, which is used in these tests .setting("ingest.geoip.downloader.endpoint", () -> fixture.getAddress(), s -> useFixture) + // also sets the enterprise downloader maxmind endpoint, to make sure we do not accidentally hit the real endpoint from tests + // note: it's not important that the downloading actually work at this point -- the rest tests (so far) don't exercise + // the downloading code because of license reasons -- but if they did, then it would be important that we're hitting a fixture + .systemProperty("ingest.geoip.downloader.maxmind.endpoint.default", () -> fixture.getAddress(), s -> useFixture) .build(); @ClassRule diff --git a/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml b/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml new file mode 100644 index 0000000000000..6809443fdfbc3 --- /dev/null +++ b/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml @@ -0,0 +1,72 @@ +setup: + - requires: + cluster_features: ["geoip.downloader.database.configuration"] + reason: "geoip downloader database configuration APIs added in 8.15" + +--- +"Test adding, getting, and removing geoip databases": + - do: + ingest.put_geoip_database: + id: "my_database_1" + body: > + { + "name": "GeoIP2-City", + "maxmind": { + "account_id": "1234" + } + } + - match: { acknowledged: true } + + - do: + ingest.put_geoip_database: + id: "my_database_1" + body: > + { + "name": "GeoIP2-Country", + "maxmind": { + "account_id": "4321" + } + } + - match: { acknowledged: true } + + - do: + ingest.put_geoip_database: + id: "my_database_2" + body: > + { + "name": "GeoIP2-City", + "maxmind": { + "account_id": "1234" + } + } + - match: { acknowledged: true } + + - do: + ingest.get_geoip_database: + id: "my_database_1" + - length: { databases: 1 } + - match: { databases.0.id: "my_database_1" } + - gte: { databases.0.modified_date_millis: 0 } + - match: { databases.0.database.name: "GeoIP2-Country" } + - match: { databases.0.database.maxmind.account_id: "4321" } + + - do: + ingest.get_geoip_database: {} + - length: { databases: 2 } + + - do: + ingest.get_geoip_database: + id: "my_database_1,my_database_2" + - length: { databases: 2 } + + - do: + ingest.delete_geoip_database: + id: "my_database_1" + + - do: + ingest.get_geoip_database: {} + - length: { databases: 1 } + - match: { databases.0.id: "my_database_2" } + - gte: { databases.0.modified_date_millis: 0 } + - match: { databases.0.database.name: "GeoIP2-City" } + - match: { databases.0.database.maxmind.account_id: "1234" } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.delete_geoip_database.json b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.delete_geoip_database.json new file mode 100644 index 0000000000000..ef6dc94dd27a6 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.delete_geoip_database.json @@ -0,0 +1,31 @@ +{ + "ingest.delete_geoip_database":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/TODO.html", + "description":"Deletes a geoip database configuration" + }, + "stability":"stable", + "visibility":"public", + "headers":{ + "accept": [ "application/json"] + }, + "url":{ + "paths":[ + { + "path":"/_ingest/geoip/database/{id}", + "methods":[ + "DELETE" + ], + "parts":{ + "id":{ + "type":"list", + "description":"A comma-separated list of geoip database configurations to delete" + } + } + } + ] + }, + "params":{ + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.get_geoip_database.json b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.get_geoip_database.json new file mode 100644 index 0000000000000..96f028e2e5251 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.get_geoip_database.json @@ -0,0 +1,37 @@ +{ + "ingest.get_geoip_database":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/TODO.html", + "description":"Returns geoip database configuration." + }, + "stability":"stable", + "visibility":"public", + "headers":{ + "accept": [ "application/json"] + }, + "url":{ + "paths":[ + { + "path":"/_ingest/geoip/database", + "methods":[ + "GET" + ] + }, + { + "path":"/_ingest/geoip/database/{id}", + "methods":[ + "GET" + ], + "parts":{ + "id":{ + "type":"list", + "description":"A comma-separated list of geoip database configurations to get; use `*` to get all geoip database configurations" + } + } + } + ] + }, + "params":{ + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.put_geoip_database.json b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.put_geoip_database.json new file mode 100644 index 0000000000000..07f9e37740279 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.put_geoip_database.json @@ -0,0 +1,35 @@ +{ + "ingest.put_geoip_database":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/TODO.html", + "description":"Puts the configuration for a geoip database to be downloaded" + }, + "stability":"stable", + "visibility":"public", + "headers":{ + "accept": [ "application/json"] + }, + "url":{ + "paths":[ + { + "path":"/_ingest/geoip/database/{id}", + "methods":[ + "PUT" + ], + "parts":{ + "id":{ + "type":"string", + "description":"The id of the database configuration" + } + } + } + ] + }, + "params":{ + }, + "body":{ + "description":"The database configuration definition", + "required":true + } + } +} diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 8288b449ec983..2c3f3c20abeb4 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -429,6 +429,7 @@ org.elasticsearch.indices.IndicesFeatures, org.elasticsearch.action.admin.cluster.allocation.AllocationStatsFeatures, org.elasticsearch.index.mapper.MapperFeatures, + org.elasticsearch.ingest.IngestGeoIpFeatures, org.elasticsearch.search.SearchFeatures, org.elasticsearch.script.ScriptFeatures, org.elasticsearch.search.retriever.RetrieversFeatures, @@ -462,4 +463,5 @@ org.elasticsearch.serverless.shardhealth, org.elasticsearch.serverless.apifiltering; exports org.elasticsearch.lucene.spatial; + } diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 183094a1048d8..3e9234db6a87c 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -214,6 +214,7 @@ static TransportVersion def(int id) { public static final TransportVersion INDEX_REQUEST_UPDATE_BY_SCRIPT_ORIGIN = def(8_705_00_0); public static final TransportVersion ML_INFERENCE_COHERE_UNUSED_RERANK_SETTINGS_REMOVED = def(8_706_00_0); public static final TransportVersion ENRICH_CACHE_STATS_SIZE_ADDED = def(8_707_00_0); + public static final TransportVersion ENTERPRISE_GEOIP_DOWNLOADER = def(8_708_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/ingest/EnterpriseGeoIpTask.java b/server/src/main/java/org/elasticsearch/ingest/EnterpriseGeoIpTask.java new file mode 100644 index 0000000000000..a204060ff0c7e --- /dev/null +++ b/server/src/main/java/org/elasticsearch/ingest/EnterpriseGeoIpTask.java @@ -0,0 +1,86 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.persistent.PersistentTaskParams; +import org.elasticsearch.xcontent.ObjectParser; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +/** + * As a relatively minor hack, this class holds the string constant that defines both the id + * and the name of the task for the new ip geolocation database downloader feature. It also provides the + * PersistentTaskParams that are necessary to start the task and to run it. + *

      + * Defining this in Elasticsearch itself gives us a reasonably tidy version of things where we don't + * end up with strange inter-module dependencies. It's not ideal, but it works fine. + */ +public final class EnterpriseGeoIpTask { + + private EnterpriseGeoIpTask() { + // utility class + } + + public static final String ENTERPRISE_GEOIP_DOWNLOADER = "enterprise-geoip-downloader"; + public static final NodeFeature GEOIP_DOWNLOADER_DATABASE_CONFIGURATION = new NodeFeature("geoip.downloader.database.configuration"); + + public static class EnterpriseGeoIpTaskParams implements PersistentTaskParams { + + public static final ObjectParser PARSER = new ObjectParser<>( + ENTERPRISE_GEOIP_DOWNLOADER, + true, + EnterpriseGeoIpTaskParams::new + ); + + public EnterpriseGeoIpTaskParams() {} + + public EnterpriseGeoIpTaskParams(StreamInput in) {} + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.endObject(); + return builder; + } + + @Override + public String getWriteableName() { + return ENTERPRISE_GEOIP_DOWNLOADER; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER; + } + + @Override + public void writeTo(StreamOutput out) {} + + public static EnterpriseGeoIpTaskParams fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof EnterpriseGeoIpTaskParams; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java b/server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java new file mode 100644 index 0000000000000..0d989ad9f7ab2 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest; + +import org.elasticsearch.features.FeatureSpecification; +import org.elasticsearch.features.NodeFeature; + +import java.util.Set; + +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.GEOIP_DOWNLOADER_DATABASE_CONFIGURATION; + +public class IngestGeoIpFeatures implements FeatureSpecification { + public Set getFeatures() { + return Set.of(GEOIP_DOWNLOADER_DATABASE_CONFIGURATION); + } +} diff --git a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification index a9d9c6a5a1938..054eb7964e9ef 100644 --- a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification +++ b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification @@ -14,6 +14,7 @@ org.elasticsearch.rest.RestFeatures org.elasticsearch.indices.IndicesFeatures org.elasticsearch.action.admin.cluster.allocation.AllocationStatsFeatures org.elasticsearch.index.mapper.MapperFeatures +org.elasticsearch.ingest.IngestGeoIpFeatures org.elasticsearch.search.SearchFeatures org.elasticsearch.search.retriever.RetrieversFeatures org.elasticsearch.script.ScriptFeatures diff --git a/server/src/test/java/org/elasticsearch/ingest/IngestServiceTests.java b/server/src/test/java/org/elasticsearch/ingest/IngestServiceTests.java index 4cb98c8d3c06b..5621ed468f557 100644 --- a/server/src/test/java/org/elasticsearch/ingest/IngestServiceTests.java +++ b/server/src/test/java/org/elasticsearch/ingest/IngestServiceTests.java @@ -1826,9 +1826,9 @@ public void testBulkRequestExecution() throws Exception { for (int i = 0; i < numRequest; i++) { IndexRequest indexRequest = new IndexRequest("_index").id("_id").setPipeline(pipelineId).setFinalPipeline("_none"); indexRequest.source(xContentType, "field1", "value1"); - boolean shouldListExecutedPiplines = randomBoolean(); - executedPipelinesExpected.add(shouldListExecutedPiplines); - indexRequest.setListExecutedPipelines(shouldListExecutedPiplines); + boolean shouldListExecutedPipelines = randomBoolean(); + executedPipelinesExpected.add(shouldListExecutedPipelines); + indexRequest.setListExecutedPipelines(shouldListExecutedPipelines); bulkRequest.add(indexRequest); } diff --git a/test/fixtures/geoip-fixture/src/main/java/fixture/geoip/EnterpriseGeoIpHttpFixture.java b/test/fixtures/geoip-fixture/src/main/java/fixture/geoip/EnterpriseGeoIpHttpFixture.java new file mode 100644 index 0000000000000..9a5205f66d1f4 --- /dev/null +++ b/test/fixtures/geoip-fixture/src/main/java/fixture/geoip/EnterpriseGeoIpHttpFixture.java @@ -0,0 +1,125 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package fixture.geoip; + +import com.sun.net.httpserver.HttpServer; + +import org.elasticsearch.common.hash.MessageDigests; +import org.junit.rules.ExternalResource; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UncheckedIOException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.security.MessageDigest; + +/** + * This fixture is used to simulate a maxmind-provided server for downloading maxmind geoip database files from the + * EnterpriseGeoIpDownloader. It can be used by integration tests so that they don't actually hit maxmind servers. + */ +public class EnterpriseGeoIpHttpFixture extends ExternalResource { + + private final Path source; + private final boolean enabled; + private final String[] databaseTypes; + private HttpServer server; + + /* + * The values in databaseTypes must be in DatabaseConfiguration.MAXMIND_NAMES, and must be one of the databases copied in the + * copyFiles method of thisi class. + */ + public EnterpriseGeoIpHttpFixture(boolean enabled, String... databaseTypes) { + this.enabled = enabled; + this.databaseTypes = databaseTypes; + try { + this.source = Files.createTempDirectory("source"); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public String getAddress() { + return "http://" + server.getAddress().getHostString() + ":" + server.getAddress().getPort() + "/"; + } + + @Override + protected void before() throws Throwable { + if (enabled) { + copyFiles(); + this.server = HttpServer.create(new InetSocketAddress(InetAddress.getLoopbackAddress(), 0), 0); + + // for expediency reasons, it is handy to have this test fixture be able to serve the dual purpose of actually stubbing + // out the download protocol for downloading files from maxmind (see the looped context creation after this stanza), as + // we as to serve an empty response for the geoip.elastic.co service here + this.server.createContext("/", exchange -> { + String response = "[]"; // an empty json array + exchange.sendResponseHeaders(200, response.length()); + try (OutputStream os = exchange.getResponseBody()) { + os.write(response.getBytes(StandardCharsets.UTF_8)); + } + }); + + // register the file types for the download fixture + for (String databaseType : databaseTypes) { + createContextForEnterpriseDatabase(databaseType); + } + + server.start(); + } + } + + private void createContextForEnterpriseDatabase(String databaseType) { + this.server.createContext("/" + databaseType + "/download", exchange -> { + exchange.sendResponseHeaders(200, 0); + if (exchange.getRequestURI().toString().contains("sha256")) { + MessageDigest sha256 = MessageDigests.sha256(); + try (InputStream inputStream = GeoIpHttpFixture.class.getResourceAsStream("/geoip-fixture/" + databaseType + ".tgz")) { + sha256.update(inputStream.readAllBytes()); + } + exchange.getResponseBody() + .write( + (MessageDigests.toHexString(sha256.digest()) + " " + databaseType + "_20240709.tar.gz").getBytes( + StandardCharsets.UTF_8 + ) + ); + } else { + try ( + OutputStream outputStream = exchange.getResponseBody(); + InputStream inputStream = GeoIpHttpFixture.class.getResourceAsStream("/geoip-fixture/" + databaseType + ".tgz") + ) { + inputStream.transferTo(outputStream); + } + } + exchange.getResponseBody().close(); + }); + } + + @Override + protected void after() { + if (enabled) { + server.stop(0); + } + } + + private void copyFiles() throws Exception { + for (String databaseType : databaseTypes) { + Files.copy( + GeoIpHttpFixture.class.getResourceAsStream("/geoip-fixture/GeoIP2-City.tgz"), + source.resolve(databaseType + ".tgz"), + StandardCopyOption.REPLACE_EXISTING + ); + } + } +} diff --git a/test/fixtures/geoip-fixture/src/main/resources/geoip-fixture/GeoIP2-City.tgz b/test/fixtures/geoip-fixture/src/main/resources/geoip-fixture/GeoIP2-City.tgz new file mode 100644 index 0000000000000000000000000000000000000000..76dd40000f1324a6d5857bb163f70aabf6bb9ccd GIT binary patch literal 6377 zcmYj#2Q*yW7w;elf`lN1NEjqhql+?HqD3Z%kLV#e)j@2s=$KKtz7-h1vjYdFKMLR8@2Y#?OYDefL7ZKLf2H@nJ^ zJvoV1~{kg?q*UduV)^=P-zr9+}Zi2-3{aHN?6-hi1C(Pd5%8pPa2hUKi+D}YjE^-c653l zp@23pHJwSFC=XwmmZ_5&leqi!Pik$B_XpFK)K~brzM>TCztS(H5}wVhjilqvPLcgR zZ}ljG>ycxer@^PvQ=v=%h8@=$cVd2Ezwc^>PQZsJJWpN~Njn^_6=-6;({<&7Oh$~m zwA?PwAx|o8_HX~)w4ajO8>)^gs9zJe%>lJyNf>h1F3*n6mAg_!rK@q^djzTxK#Y$5 z>2o=2@;;iLpnd0Feug@l4nG6ta15%}j3Ln1{9J7?lIW*-*X~ocLzq&t(`^NmzhR{< zpLV1Kgp7kn(*x9BG*Eg%1x_*8(*UnOB^@ zYr*JUN)W z$WD0o1m~x)3s`p_?m=i#Oa$~Xb@Uv0gitnD|h`k;eAzKn;`TG*?@Va9uZ1O(##K_2iW9NDk5cy+*ReXo{^F4D8Omcfb|k z(Rc2nB6Y*Gxh8sDw_U%}mG!O6x5J- z!+7y!pD~M5%t^5l9^tf>_{d`?R~7ay`Ze|0g-t}g|N7cj^BXhGHB2%nC0tM5WTkjc zNztDt)(0_l;loa9>*78RClYG2T5fzxQeVy)HEYAxo#I^S+4k9#o5**_cGgnqYuuTT z*{C(HZQ0w0cbyrkCg@9L*6zT1=f4rJ`U_5fza}{lxxXZQT|R=62$8$-enlgz`5BD- z{DJ0YHV+C9W)C`#JK9!^=UfJ@i+hL&&#?9E_5Fkpvk>KC(qfWg#)cc&4I$0cG%Jqt z99izaF~`z4DODo52j>aoZ4?1Siu9 z_7-JIJAvSFCm6_MZM#*pum2w(0aLAq$m-R ze7`LI$|gEd%MV|?$iRT76Dss=6;SP`_z*UL}SG{}dzFUv5;wZE;~semQ$ zMJV>CgTR`9L*U9yc&h%S%-q_bO#dQ_{rpjlz*@94r|G{s&rCX_|3ueZ%?$J9R2Ae$ zhG}V-Fh4PSzDT`XFB)1O+CKTQhOLI;WfR6luj*ME_Yq3NlmfvR1U zk_!vfI=Y5WvySCN@GWP^S@a5t4<0V3>=zmE+L>!V!@uYBdPp!>priYDJg)f=sn$99 z>aIJWJp#dg@PUTqhH~Fak4VN{3qgFpj)Tj=7s;1BZ=8rhE*OQsXLWpE%Rntw>e4z( z?`qX|H3g0yGKW;N?%u=Eg}7@)MtY^RnrAi7^6isoo?f>6t&`SF*Emuy%|++$2%}Kl zEab>CeZCcmK#+4`~zRUH~*s zP_E^=%(tyBVvS=10KQC=Yr%aQx@-mlUv5kQU_d1k`xE8L7qbO)*+Ktj9*lQ=O5kgP zJQL0?Ex22tp}cC1K`{_ICQR~jJWgUja~_Q`Sgr6SL_yC`u0pQ;TvPzO8Hn8jwxUZ6 zaXZ*M(BrRM$D~KRweXv#Pv*6tnW(ZynhvBHsItJzC3lSa%Bg_Jx#(2-3NjjYq~#s} zJ%>#&G0o88UhuF>gM=2WyQDRYyZ?o>PC`l=I}$;X#*jPX-EHkq*BERR$vZzqDPBL@=BGn@3);_{wt* zqjFa(BX%)rC!O;#t3hS|+PAzrGB*aYOb=ttJBo@k27s6klQ+VziuWhUC-M&}ENJ53SGc=a=# zo2RemDPmwDT+Ua#HNoOJhYmUMo*v>UHd%Bgg?%fb&QI8IaAg3BgU`}UlPChqX;|m1 z<{i=<3XkpCFo8W_*=th*{FOASgR~UX4vxQgUbg^?CL2pn78&I!{$u^oyvsa2lpBC6 zP@PAE)#xIcuf#&p6zMK+wK((cZ*8ol-HR2zxt;XpLN6r-giGFFF&UhaJijeF=u{S8 zHrU)(t~t_qpvzqyY@#{tE`q_SK-aM^y$?Ny37HJe1&8H>x~RvYNe5x%09*;U$zSs0 zZg#43o?DmROKU}${72ts)HW%8lw#fj`EP*@t`0IrpC4H*>zYZb)C7Y;x;8>?)!o1| zxWvbXicUa}97+4^9yqPkj^lY6iifiXRK2@pV2zR-=b+{@A?MH=gh_Dt0J-x(e)IJ$ zQbpjU#0#hDzJw>2pbi3u5@-h!ZGrsXc>W8@kC@+lv3BqGqoHZ#ND1Quz}MwK^GaQrKj7aVs`9f^EJ&6eTWanVDG8&)q#}d_`1PgT(xm<0ST;(8sws5|Ahlzvn;^SKA3hfkrn8ioUl3M3QB>k!>*;J4H&Tfy zQhB~^z7SPB4h$nD-sb5H!rpU3$_q14VC?enzc%f!F^0g%1;)v88e~`OnRv6>sF1rd z-ld?fU|HRv964F+GvzbIvGpp{QrLO4SYsxnVh-X?;!bu9IVJ^fD(`N=7``1q_GERN z)Z!_>R|NY6YsmXmsA<_vtK+4)0Eirk9M$;^59$-fJ@LJ3djfmx$!=82AQ`xNDUdtM zZH?c3uPc0S+j^d9ULW#Wl1LIT3z@w#OFBz2OEUXDQN-6)S%PrPqPxqzO*XGG&prQI zPK5H_7)+jdz)SP5vrr6AzY>H2D{fC4M*?0+GDx2sbF~t3rFi7&GWW4pGN==;J|mTM zDN(b#J$Y@MJ@Fcn1WAP?N6Py+`qQ`h3%F4CeLNZvE0xZuBKwI_iibKJV(^ z0GLvG2)S@BaI2GBCDiFzK|n5C2^x?|9igd@OA1%; z=UqJX4yPBuO|XCD{-5WQ<>zQEf?WFw=3}M(I=ayZ7uX4&ZA-*=R+e+k02s5^gsslV z9+(nmu6|E~y%6J0Y9j?UU)5X-EZ`;MIaT&vFk#(2qXH`2)L`?R+ufh6IVOd~Hj zH*trb!Nbo6LTchiK%bUFdjL*F-QWK^C&?XCVFs4yUn0Nu0N&D-6!A!Lu!%q$)}fPg z`1dRgX%G8p)ZaCqwLkoq$^B0VE4DWplnMH(Pl;&nH1Ts7EdXyKuWvB>4f_2I*@5BD zu$17CmiFIuB%lHhT-ktIg11oX!6prJ!HEP>s{4v$;AdMTsM`?eZr7VIjMJr31bEY~ zGfDb(|Ip0Ts1be8vWBugIWI=)S%_J@vk?8}sKb_CCP=dRvhetmO@6{a;Ktkgy@&*a zTA2*W_TyvN$Qoj7V=5tJfHx?~TbSeLzN=E=&JJF0!lYeHzV^m&w3dQn>#zJ^g(YIL zA>#Ee&W!O`jNd;1y)te*^$gkNiPG;LSh4?9-(7H?V|EB*`@3=ui+&`ndPL`ge2|Cuu}7TQQ2QK zo9!qIt9@n|x`&s-vq)-%K1d~}w66+~U^E`tK=`F@9K0)N?zPD0ku@6_edZ=8(%?IH zJLhbsJOs~gPbb!5*-no9_$P6EakXR)z9wlKEqFl4H^Kbfr|?JiYq4nY)8%el4f0Dx zSIE#mjtI`ot*#n35<(32P)rxTR9LY_#yj3AQE8Iq__=)Vs)y*uij;!sXZxXsE!$4r2>%Ln-=sZ#{Txxgs?9Nx51FJjx76^Py=bxGtJ;wXUD+RVDEuskheiy4SZPEp6d}snIBuCcQ$E!b!Ba7&9AOGQ zi8qSg7J2G0CUDRn*7_QykLZpyI9KD_TU6%{c~d)X^DL0Rnu8wNL?>KpW;c+Q!`5}R zI?TvaI4oT*=qW1e$kIJ(|H1TPtQ8cnQ63(8O2*)0v3 z5Mtc4?LN^kqv@g;9)H7~d;At?uUe^H#>{+6votSRSX_A_Xe?EYjHs0LO52ev&E9Vd zTXk6(%}Ed>DIOt#wG?Xn`YjAg6%+s9o@nMIwtN3I0Q*&w>KGJc&$u~JlI_Gn`C;E3 zf8_mvDgLX!V`ouHP54=qY307>ufEBl_HSlU2Q_3LXjpP*1S0A}SkJzMR0^$c*tnYy`HvxTmn&rnKeAUF(R>=MT^aSZyzqSFWEW7 z)-~$~F+tOo%!QhqF%}IuKpNk+p5W56t&CHpgc&Y*{pkNil zbK+MiIg8NRQhK#Q#$mAmHxDnTCknr&TaO%e|2mqEj7~Z{W^u<=4LCSDKI)q(Vv4+D zWhL9N+PlpV-B8z)tzg|Db!Tkc@sB-XE@Jw+xP8T^LgRN< zn1M~<*hN>Px24AZT^RIpa%4J;-&OC`a(w^5;SbF{1*t+gE*Wq0vNvP4?(Xk*|J3AF z%bN`keN^nTs;0Jadu5&H=r)S%pn>&oc2teGvzn3h1@e9cgbLcdEr*OPgNtmR;h+=-%n%9mC(1-8+6$SCY{SA3wnbJcu`}3guNWaTrT1k#u9~BLs0sor(Nu zg(pj*$-@ID$GQr_5w)Kir`+EoTUG~pPLMjK7``TJE6Rwmoz_gxMQXhXrGcAg(&>G; z#h8XATE_8;1*3uN=L$ou%*=E$*?eZpdP<)9QGJtYHhXo`Y`03%v@L73E%~PwBv~U( z3@ls!7QcC7I$!uo&nTmfAve|DN-UuK#NnT}&IQb$yyWNtTVez!sF?=PbBF`>`%Y<# zdGIq9pN)jhNPR)wdBlwPk)xwT)~u(d1p1Pv?RBnM05KgmEP$P+qC6pr+<~xD{vMM zrQHUlBpqDDb6O|pWWu5c0$S3U@{62O<{dl6%D*=_Sc>-U2bJsSuO%Al$&Rh{q)ygi zD)Vz1oxnd|4Q9`tE;u}@oQ_VQ+ggg%w|OJeFV_=XuhP=%>Mlw%%+g7#=kFHxiP+re z8Ko(79Ng0C6W?Rb`g8pmb7NAuoRsL6d&Q|GrSVV@+Z{}x1}vrXO3QQI%f^ zpIt=Ysu44$1=A#VV|BRpy|uAm?;cw>Gy&l+K_s6-uBRbAFRJSwPo@~xEf%7essSSY z7+4({n3z99m49%^n>)EcqdFX1#%kAVeeW8dhoV{g1G+r95Yj(jCU)8dvKCA+?%l~T zwRS7hCdxrGV*%*ZEVi>(knQ}e$)9XjPdZx(&IHeL(LDo~2U}5e%dRaG@`!$uN_+k7 zsOnF77IFx}M$(0MVp&^1ZlbBM@aI-#_`_r+Mhs)1f9S5!;0o=23Yl6$2RX(ODaa|1 zz)Dy9j=A8&4~x@5UZMA$ODzGxuceHY;w5&jd?w9F;`q&8H@x#=iEdQppGE~fewz=D zrlq-C--3$_+QmZNdSJxAJir*d%ffSp_G#Vh*3K}TYEBCLIg#YWEmpgp1cVgp3)wp& zH_kn)fu?By|IWs_20-*Vj@JMHM={;N>@d%QTlRRYP=IKoMzD+|xi{B+Pg!Z%>#KSi zEPb+%(JL3JG_+?18zv=I0muKABy^EeeU`E=lc^$;S>lOh-3;CL_2=v-kXb&g-S26+ VZT`Op;4}kha-@ERLR2A;{{wE_+r createComponents(PluginServices services) { + enterpriseGeoIpDownloaderLicenseListener = new EnterpriseGeoIpDownloaderLicenseListener( + services.client(), + services.clusterService(), + services.threadPool(), + getLicenseState() + ); + enterpriseGeoIpDownloaderLicenseListener.init(); + return List.of(enterpriseGeoIpDownloaderLicenseListener); + } +} diff --git a/x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListener.java b/x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListener.java new file mode 100644 index 0000000000000..d6e6f57f10976 --- /dev/null +++ b/x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListener.java @@ -0,0 +1,145 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.geoip; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ResourceAlreadyExistsException; +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateListener; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.ingest.EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams; +import org.elasticsearch.license.License; +import org.elasticsearch.license.LicenseStateListener; +import org.elasticsearch.license.LicensedFeature; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.persistent.PersistentTasksService; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.RemoteTransportException; +import org.elasticsearch.xpack.core.XPackField; + +import java.util.Objects; + +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; + +public class EnterpriseGeoIpDownloaderLicenseListener implements LicenseStateListener, ClusterStateListener { + private static final Logger logger = LogManager.getLogger(EnterpriseGeoIpDownloaderLicenseListener.class); + // Note: This custom type is GeoIpMetadata.TYPE, but that class is not exposed to this plugin + static final String INGEST_GEOIP_CUSTOM_METADATA_TYPE = "ingest_geoip"; + + private final PersistentTasksService persistentTasksService; + private final ClusterService clusterService; + private final XPackLicenseState licenseState; + private static final LicensedFeature.Momentary ENTERPRISE_GEOIP_FEATURE = LicensedFeature.momentary( + null, + XPackField.ENTERPRISE_GEOIP_DOWNLOADER, + License.OperationMode.PLATINUM + ); + private volatile boolean licenseIsValid = false; + private volatile boolean hasIngestGeoIpMetadata = false; + + protected EnterpriseGeoIpDownloaderLicenseListener( + Client client, + ClusterService clusterService, + ThreadPool threadPool, + XPackLicenseState licenseState + ) { + this.persistentTasksService = new PersistentTasksService(clusterService, threadPool, client); + this.clusterService = clusterService; + this.licenseState = licenseState; + } + + @UpdateForV9 // use MINUS_ONE once that means no timeout + private static final TimeValue MASTER_TIMEOUT = TimeValue.MAX_VALUE; + private volatile boolean licenseStateListenerRegistered; + + public void init() { + listenForLicenseStateChanges(); + clusterService.addListener(this); + } + + void listenForLicenseStateChanges() { + assert licenseStateListenerRegistered == false : "listenForLicenseStateChanges() should only be called once"; + licenseStateListenerRegistered = true; + licenseState.addListener(this); + } + + @Override + public void licenseStateChanged() { + licenseIsValid = ENTERPRISE_GEOIP_FEATURE.checkWithoutTracking(licenseState); + maybeUpdateTaskState(clusterService.state()); + } + + @Override + public void clusterChanged(ClusterChangedEvent event) { + hasIngestGeoIpMetadata = event.state().metadata().custom(INGEST_GEOIP_CUSTOM_METADATA_TYPE) != null; + final boolean ingestGeoIpCustomMetaChangedInEvent = event.metadataChanged() + && event.changedCustomMetadataSet().contains(INGEST_GEOIP_CUSTOM_METADATA_TYPE); + final boolean masterNodeChanged = Objects.equals( + event.state().nodes().getMasterNode(), + event.previousState().nodes().getMasterNode() + ) == false; + /* + * We don't want to potentially start the task on every cluster state change, so only maybeUpdateTaskState if this cluster change + * event involved the modification of custom geoip metadata OR a master node change + */ + if (ingestGeoIpCustomMetaChangedInEvent || (masterNodeChanged && hasIngestGeoIpMetadata)) { + maybeUpdateTaskState(event.state()); + } + } + + private void maybeUpdateTaskState(ClusterState state) { + // We should only start/stop task from single node, master is the best as it will go through it anyway + if (state.nodes().isLocalNodeElectedMaster()) { + if (licenseIsValid) { + if (hasIngestGeoIpMetadata) { + ensureTaskStarted(); + } + } else { + ensureTaskStopped(); + } + } + } + + private void ensureTaskStarted() { + assert licenseIsValid : "Task should never be started without valid license"; + persistentTasksService.sendStartRequest( + ENTERPRISE_GEOIP_DOWNLOADER, + ENTERPRISE_GEOIP_DOWNLOADER, + new EnterpriseGeoIpTaskParams(), + MASTER_TIMEOUT, + ActionListener.wrap(r -> logger.debug("Started enterprise geoip downloader task"), e -> { + Throwable t = e instanceof RemoteTransportException ? ExceptionsHelper.unwrapCause(e) : e; + if (t instanceof ResourceAlreadyExistsException == false) { + logger.error("failed to create enterprise geoip downloader task", e); + } + }) + ); + } + + private void ensureTaskStopped() { + ActionListener> listener = ActionListener.wrap( + r -> logger.debug("Stopped enterprise geoip downloader task"), + e -> { + Throwable t = e instanceof RemoteTransportException ? ExceptionsHelper.unwrapCause(e) : e; + if (t instanceof ResourceNotFoundException == false) { + logger.error("failed to remove enterprise geoip downloader task", e); + } + } + ); + persistentTasksService.sendRemoveRequest(ENTERPRISE_GEOIP_DOWNLOADER, MASTER_TIMEOUT, listener); + } +} diff --git a/x-pack/plugin/geoip-enterprise-downloader/src/test/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListenerTests.java b/x-pack/plugin/geoip-enterprise-downloader/src/test/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListenerTests.java new file mode 100644 index 0000000000000..5a5aacd392f3c --- /dev/null +++ b/x-pack/plugin/geoip-enterprise-downloader/src/test/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListenerTests.java @@ -0,0 +1,219 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.geoip; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.node.DiscoveryNodeUtils; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.license.License; +import org.elasticsearch.license.TestUtils; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.license.internal.XPackLicenseStatus; +import org.elasticsearch.node.Node; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.persistent.RemovePersistentTaskAction; +import org.elasticsearch.persistent.StartPersistentTaskAction; +import org.elasticsearch.telemetry.metric.MeterRegistry; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.client.NoOpClient; +import org.elasticsearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; + +import java.util.Map; +import java.util.UUID; + +import static org.elasticsearch.xpack.geoip.EnterpriseGeoIpDownloaderLicenseListener.INGEST_GEOIP_CUSTOM_METADATA_TYPE; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class EnterpriseGeoIpDownloaderLicenseListenerTests extends ESTestCase { + + private ThreadPool threadPool; + + @Before + public void setup() { + threadPool = new ThreadPool(Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test").build(), MeterRegistry.NOOP); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testAllConditionsMetOnStart() { + // Should never start if not master node, even if all other conditions have been met + final XPackLicenseState licenseState = getAlwaysValidLicense(); + ClusterService clusterService = createClusterService(true, false); + TaskStartAndRemoveMockClient client = new TaskStartAndRemoveMockClient(threadPool, true, false); + EnterpriseGeoIpDownloaderLicenseListener listener = new EnterpriseGeoIpDownloaderLicenseListener( + client, + clusterService, + threadPool, + licenseState + ); + listener.init(); + listener.licenseStateChanged(); + listener.clusterChanged(new ClusterChangedEvent("test", createClusterState(true, true), clusterService.state())); + client.assertTaskStartHasBeenCalled(); + } + + public void testLicenseChanges() { + final TestUtils.UpdatableLicenseState licenseState = new TestUtils.UpdatableLicenseState(); + licenseState.update(new XPackLicenseStatus(License.OperationMode.TRIAL, false, "")); + ClusterService clusterService = createClusterService(true, true); + TaskStartAndRemoveMockClient client = new TaskStartAndRemoveMockClient(threadPool, false, true); + EnterpriseGeoIpDownloaderLicenseListener listener = new EnterpriseGeoIpDownloaderLicenseListener( + client, + clusterService, + threadPool, + licenseState + ); + listener.init(); + listener.licenseStateChanged(); + listener.clusterChanged(new ClusterChangedEvent("test", clusterService.state(), clusterService.state())); + client.expectStartTask = true; + client.expectRemoveTask = false; + licenseState.update(new XPackLicenseStatus(License.OperationMode.TRIAL, true, "")); + listener.licenseStateChanged(); + client.assertTaskStartHasBeenCalled(); + client.expectStartTask = false; + client.expectRemoveTask = true; + licenseState.update(new XPackLicenseStatus(License.OperationMode.TRIAL, false, "")); + listener.licenseStateChanged(); + client.assertTaskRemoveHasBeenCalled(); + } + + public void testDatabaseChanges() { + final XPackLicenseState licenseState = getAlwaysValidLicense(); + ClusterService clusterService = createClusterService(true, false); + TaskStartAndRemoveMockClient client = new TaskStartAndRemoveMockClient(threadPool, false, false); + EnterpriseGeoIpDownloaderLicenseListener listener = new EnterpriseGeoIpDownloaderLicenseListener( + client, + clusterService, + threadPool, + licenseState + ); + listener.init(); + listener.licenseStateChanged(); + listener.clusterChanged(new ClusterChangedEvent("test", clusterService.state(), clusterService.state())); + // add a geoip database, so the task ought to be started: + client.expectStartTask = true; + listener.clusterChanged(new ClusterChangedEvent("test", createClusterState(true, true), clusterService.state())); + client.assertTaskStartHasBeenCalled(); + // Now we remove the geoip databases. The task ought to just be left alone. + client.expectStartTask = false; + client.expectRemoveTask = false; + listener.clusterChanged(new ClusterChangedEvent("test", createClusterState(true, false), clusterService.state())); + } + + public void testMasterChanges() { + // Should never start if not master node, even if all other conditions have been met + final XPackLicenseState licenseState = getAlwaysValidLicense(); + ClusterService clusterService = createClusterService(false, false); + TaskStartAndRemoveMockClient client = new TaskStartAndRemoveMockClient(threadPool, false, false); + EnterpriseGeoIpDownloaderLicenseListener listener = new EnterpriseGeoIpDownloaderLicenseListener( + client, + clusterService, + threadPool, + licenseState + ); + listener.init(); + listener.licenseStateChanged(); + listener.clusterChanged(new ClusterChangedEvent("test", createClusterState(false, true), clusterService.state())); + client.expectStartTask = true; + listener.clusterChanged(new ClusterChangedEvent("test", createClusterState(true, true), clusterService.state())); + } + + private XPackLicenseState getAlwaysValidLicense() { + return new XPackLicenseState(() -> 0); + } + + private ClusterService createClusterService(boolean isMasterNode, boolean hasGeoIpDatabases) { + ClusterService clusterService = mock(ClusterService.class); + ClusterState state = createClusterState(isMasterNode, hasGeoIpDatabases); + when(clusterService.state()).thenReturn(state); + return clusterService; + } + + private ClusterState createClusterState(boolean isMasterNode, boolean hasGeoIpDatabases) { + String indexName = randomAlphaOfLength(5); + Index index = new Index(indexName, UUID.randomUUID().toString()); + IndexMetadata.Builder idxMeta = IndexMetadata.builder(index.getName()) + .settings(indexSettings(IndexVersion.current(), 1, 0).put("index.uuid", index.getUUID())); + String nodeId = ESTestCase.randomAlphaOfLength(8); + DiscoveryNodes.Builder discoveryNodesBuilder = DiscoveryNodes.builder().add(DiscoveryNodeUtils.create(nodeId)).localNodeId(nodeId); + if (isMasterNode) { + discoveryNodesBuilder.masterNodeId(nodeId); + } + ClusterState.Builder clusterStateBuilder = ClusterState.builder(new ClusterName("name")); + if (hasGeoIpDatabases) { + PersistentTasksCustomMetadata tasksCustomMetadata = new PersistentTasksCustomMetadata(1L, Map.of()); + clusterStateBuilder.metadata(Metadata.builder().putCustom(INGEST_GEOIP_CUSTOM_METADATA_TYPE, tasksCustomMetadata).put(idxMeta)); + } + return clusterStateBuilder.nodes(discoveryNodesBuilder).build(); + } + + private static class TaskStartAndRemoveMockClient extends NoOpClient { + + boolean expectStartTask; + boolean expectRemoveTask; + private boolean taskStartCalled = false; + private boolean taskRemoveCalled = false; + + private TaskStartAndRemoveMockClient(ThreadPool threadPool, boolean expectStartTask, boolean expectRemoveTask) { + super(threadPool); + this.expectStartTask = expectStartTask; + this.expectRemoveTask = expectRemoveTask; + } + + @Override + protected void doExecute( + ActionType action, + Request request, + ActionListener listener + ) { + if (action.equals(StartPersistentTaskAction.INSTANCE)) { + if (expectStartTask) { + taskStartCalled = true; + } else { + fail("Should not start task"); + } + } else if (action.equals(RemovePersistentTaskAction.INSTANCE)) { + if (expectRemoveTask) { + taskRemoveCalled = true; + } else { + fail("Should not remove task"); + } + } else { + throw new IllegalStateException("unexpected action called [" + action.name() + "]"); + } + } + + void assertTaskStartHasBeenCalled() { + assertTrue(taskStartCalled); + } + + void assertTaskRemoveHasBeenCalled() { + assertTrue(taskRemoveCalled); + } + } +} diff --git a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java index c6e926329b8fa..9eee5b0bd7a6f 100644 --- a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java +++ b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java @@ -39,6 +39,9 @@ public class Constants { "cluster:admin/indices/dangling/find", "cluster:admin/indices/dangling/import", "cluster:admin/indices/dangling/list", + "cluster:admin/ingest/geoip/database/delete", + "cluster:admin/ingest/geoip/database/get", + "cluster:admin/ingest/geoip/database/put", "cluster:admin/ingest/pipeline/delete", "cluster:admin/ingest/pipeline/get", "cluster:admin/ingest/pipeline/put", From b5a7bdfa50c422271bf6359ced576f4ad62763ce Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 18 Jul 2024 14:46:00 +1000 Subject: [PATCH 34/65] Mute org.elasticsearch.search.sort.FieldSortIT testIssue6614 #110999 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 44d21b55ce3ff..320fc00c5d943 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -108,6 +108,9 @@ tests: - class: org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests method: testInfoParameters {class org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec} issue: https://github.com/elastic/elasticsearch/issues/110981 +- class: org.elasticsearch.search.sort.FieldSortIT + method: testIssue6614 + issue: https://github.com/elastic/elasticsearch/issues/110999 # Examples: # From ff2c4dea63e21ea721a60918aa40dee1b7b8a3fe Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 18 Jul 2024 15:21:16 +1000 Subject: [PATCH 35/65] Add support for shared input stream to RangeMissingHandler (#110855) This PR augments the RangeMissingHandler interface to support shared input stream which is reused when filling mulitple gaps. The shared input stream is meant to be consumed sequentially to fill the list of gaps in sequential order. The existing behaviour is preserved when shared input stream is not used, i.e. when it is `null`. --- .../shared/SharedBlobCacheService.java | 173 +++++++++++++--- .../shared/SharedBlobCacheServiceTests.java | 196 +++++++++++++++--- .../store/input/FrozenIndexInput.java | 3 +- 3 files changed, 317 insertions(+), 55 deletions(-) diff --git a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java index ac22d22d5affb..9cb83e35b63d6 100644 --- a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java +++ b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java @@ -31,7 +31,9 @@ import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.core.AbstractRefCounted; import org.elasticsearch.core.Assertions; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.Strings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; @@ -41,6 +43,7 @@ import org.elasticsearch.threadpool.ThreadPool; import java.io.IOException; +import java.io.InputStream; import java.io.UncheckedIOException; import java.lang.invoke.MethodHandles; import java.lang.invoke.VarHandle; @@ -643,9 +646,10 @@ private RangeMissingHandler writerWithOffset(RangeMissingHandler writer, int wri // no need to allocate a new capturing lambda if the offset isn't adjusted return writer; } - return (channel, channelPos, relativePos, len, progressUpdater) -> writer.fillCacheRange( + return (channel, channelPos, streamFactory, relativePos, len, progressUpdater) -> writer.fillCacheRange( channel, channelPos, + streamFactory, relativePos - writeOffset, len, progressUpdater @@ -923,9 +927,10 @@ void populate( return; } try (var gapsListener = new RefCountingListener(listener.map(unused -> true))) { + assert writer.sharedInputStreamFactory(gaps) == null; for (SparseFileTracker.Gap gap : gaps) { executor.execute( - fillGapRunnable(gap, writer, ActionListener.releaseAfter(gapsListener.acquire(), refs.acquire())) + fillGapRunnable(gap, writer, null, ActionListener.releaseAfter(gapsListener.acquire(), refs.acquire())) ); } } @@ -968,8 +973,30 @@ void populateAndRead( ); if (gaps.isEmpty() == false) { - for (SparseFileTracker.Gap gap : gaps) { - executor.execute(fillGapRunnable(gap, writer, refs.acquireListener())); + final SourceInputStreamFactory streamFactory = writer.sharedInputStreamFactory(gaps); + logger.trace( + () -> Strings.format( + "fill gaps %s %s shared input stream factory", + gaps, + (streamFactory == null ? "without" : "with"), + (streamFactory == null ? "" : " " + streamFactory) + ) + ); + if (streamFactory == null) { + for (SparseFileTracker.Gap gap : gaps) { + executor.execute(fillGapRunnable(gap, writer, null, refs.acquireListener())); + } + } else { + final List gapFillingTasks = gaps.stream() + .map(gap -> fillGapRunnable(gap, writer, streamFactory, refs.acquireListener())) + .toList(); + executor.execute(() -> { + try (streamFactory) { + // Fill the gaps in order. If a gap fails to fill for whatever reason, the task for filling the next + // gap will still be executed. + gapFillingTasks.forEach(Runnable::run); + } + }); } } } @@ -978,7 +1005,12 @@ void populateAndRead( } } - private AbstractRunnable fillGapRunnable(SparseFileTracker.Gap gap, RangeMissingHandler writer, ActionListener listener) { + private AbstractRunnable fillGapRunnable( + SparseFileTracker.Gap gap, + RangeMissingHandler writer, + @Nullable SourceInputStreamFactory streamFactory, + ActionListener listener + ) { return ActionRunnable.run(listener.delegateResponse((l, e) -> failGapAndListener(gap, l, e)), () -> { var ioRef = io; assert regionOwners.get(ioRef) == CacheFileRegion.this; @@ -987,6 +1019,7 @@ private AbstractRunnable fillGapRunnable(SparseFileTracker.Gap gap, RangeMissing writer.fillCacheRange( ioRef, start, + streamFactory, start, Math.toIntExact(gap.end() - start), progress -> gap.onProgress(start + progress) @@ -1072,16 +1105,21 @@ public int populateAndRead( // We are interested in the total time that the system spends when fetching a result (including time spent queuing), so we start // our measurement here. final long startTime = relativeTimeInNanosSupplier.getAsLong(); - RangeMissingHandler writerInstrumentationDecorator = ( - SharedBytes.IO channel, - int channelPos, - int relativePos, - int length, - IntConsumer progressUpdater) -> { - writer.fillCacheRange(channel, channelPos, relativePos, length, progressUpdater); - var elapsedTime = TimeUnit.NANOSECONDS.toMicros(relativeTimeInNanosSupplier.getAsLong() - startTime); - SharedBlobCacheService.this.blobCacheMetrics.getCacheMissLoadTimes().record(elapsedTime); - SharedBlobCacheService.this.blobCacheMetrics.getCacheMissCounter().increment(); + RangeMissingHandler writerInstrumentationDecorator = new DelegatingRangeMissingHandler(writer) { + @Override + public void fillCacheRange( + SharedBytes.IO channel, + int channelPos, + SourceInputStreamFactory streamFactory, + int relativePos, + int length, + IntConsumer progressUpdater + ) throws IOException { + writer.fillCacheRange(channel, channelPos, streamFactory, relativePos, length, progressUpdater); + var elapsedTime = TimeUnit.NANOSECONDS.toMicros(relativeTimeInNanosSupplier.getAsLong() - startTime); + SharedBlobCacheService.this.blobCacheMetrics.getCacheMissLoadTimes().record(elapsedTime); + SharedBlobCacheService.this.blobCacheMetrics.getCacheMissCounter().increment(); + } }; if (rangeToRead.isEmpty()) { // nothing to read, skip @@ -1165,20 +1203,36 @@ private RangeMissingHandler writerWithOffset(RangeMissingHandler writer, CacheFi // no need to allocate a new capturing lambda if the offset isn't adjusted adjustedWriter = writer; } else { - adjustedWriter = (channel, channelPos, relativePos, len, progressUpdater) -> writer.fillCacheRange( - channel, - channelPos, - relativePos - writeOffset, - len, - progressUpdater - ); + adjustedWriter = new DelegatingRangeMissingHandler(writer) { + @Override + public void fillCacheRange( + SharedBytes.IO channel, + int channelPos, + SourceInputStreamFactory streamFactory, + int relativePos, + int len, + IntConsumer progressUpdater + ) throws IOException { + delegate.fillCacheRange(channel, channelPos, streamFactory, relativePos - writeOffset, len, progressUpdater); + } + }; } if (Assertions.ENABLED) { - return (channel, channelPos, relativePos, len, progressUpdater) -> { - assert assertValidRegionAndLength(fileRegion, channelPos, len); - adjustedWriter.fillCacheRange(channel, channelPos, relativePos, len, progressUpdater); - assert regionOwners.get(fileRegion.io) == fileRegion - : "File chunk [" + fileRegion.regionKey + "] no longer owns IO [" + fileRegion.io + "]"; + return new DelegatingRangeMissingHandler(adjustedWriter) { + @Override + public void fillCacheRange( + SharedBytes.IO channel, + int channelPos, + SourceInputStreamFactory streamFactory, + int relativePos, + int len, + IntConsumer progressUpdater + ) throws IOException { + assert assertValidRegionAndLength(fileRegion, channelPos, len); + delegate.fillCacheRange(channel, channelPos, streamFactory, relativePos, len, progressUpdater); + assert regionOwners.get(fileRegion.io) == fileRegion + : "File chunk [" + fileRegion.regionKey + "] no longer owns IO [" + fileRegion.io + "]"; + } }; } return adjustedWriter; @@ -1240,18 +1294,79 @@ public interface RangeAvailableHandler { @FunctionalInterface public interface RangeMissingHandler { + /** + * Attempt to get a shared {@link SourceInputStreamFactory} for the given list of Gaps so that all of them + * can be filled from the input stream created from the factory. If a factory is returned, the gaps must be + * filled sequentially by calling {@link #fillCacheRange} in order with the factory. If {@code null} is returned, + * each invocation of {@link #fillCacheRange} creates its own input stream and can therefore be executed in parallel. + * @param gaps The list of gaps to be filled by fetching from source storage and writing into the cache. + * @return A factory object to be shared by all gaps filling process, or {@code null} if each gap filling should create + * its own input stream. + */ + @Nullable + default SourceInputStreamFactory sharedInputStreamFactory(List gaps) { + return null; + } + /** * Callback method used to fetch data (usually from a remote storage) and write it in the cache. * * @param channel is the cache region to write to * @param channelPos a position in the channel (cache file) to write to + * @param streamFactory factory to get the input stream positioned at the given value for the remote storage. + * This is useful for sharing the same stream across multiple calls to this method. + * If it is {@code null}, the method should open input stream on its own. * @param relativePos the relative position in the remote storage to read from * @param length of data to fetch * @param progressUpdater consumer to invoke with the number of copied bytes as they are written in cache. * This is used to notify waiting readers that data become available in cache. */ - void fillCacheRange(SharedBytes.IO channel, int channelPos, int relativePos, int length, IntConsumer progressUpdater) - throws IOException; + void fillCacheRange( + SharedBytes.IO channel, + int channelPos, + @Nullable SourceInputStreamFactory streamFactory, + int relativePos, + int length, + IntConsumer progressUpdater + ) throws IOException; + } + + /** + * Factory to create the input stream for reading data from the remote storage as the source for filling local cache regions. + */ + public interface SourceInputStreamFactory extends Releasable { + + /** + * Create the input stream at the specified position. + * @param relativePos the relative position in the remote storage to read from. + * @return the input stream ready to be read from. + */ + InputStream create(int relativePos) throws IOException; + } + + private abstract static class DelegatingRangeMissingHandler implements RangeMissingHandler { + protected final RangeMissingHandler delegate; + + protected DelegatingRangeMissingHandler(RangeMissingHandler delegate) { + this.delegate = delegate; + } + + @Override + public SourceInputStreamFactory sharedInputStreamFactory(List gaps) { + return delegate.sharedInputStreamFactory(gaps); + } + + @Override + public void fillCacheRange( + SharedBytes.IO channel, + int channelPos, + SourceInputStreamFactory streamFactory, + int relativePos, + int length, + IntConsumer progressUpdater + ) throws IOException { + delegate.fillCacheRange(channel, channelPos, streamFactory, relativePos, length, progressUpdater); + } } public record Stats( diff --git a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java index 5d341897de57b..e477673c90d6d 100644 --- a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java +++ b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java @@ -14,6 +14,9 @@ import org.elasticsearch.blobcache.BlobCacheMetrics; import org.elasticsearch.blobcache.BlobCacheUtils; import org.elasticsearch.blobcache.common.ByteRange; +import org.elasticsearch.blobcache.common.SparseFileTracker; +import org.elasticsearch.blobcache.shared.SharedBlobCacheService.RangeMissingHandler; +import org.elasticsearch.blobcache.shared.SharedBlobCacheService.SourceInputStreamFactory; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; @@ -23,6 +26,7 @@ import org.elasticsearch.common.unit.RatioValue; import org.elasticsearch.common.unit.RelativeByteSizeValue; import org.elasticsearch.common.util.concurrent.DeterministicTaskQueue; +import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.StoppableExecutorServiceWrapper; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.env.Environment; @@ -34,6 +38,7 @@ import org.elasticsearch.threadpool.ThreadPool; import java.io.IOException; +import java.io.InputStream; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -43,8 +48,11 @@ import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.IntConsumer; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -53,7 +61,10 @@ import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.sameInstance; public class SharedBlobCacheServiceTests extends ESTestCase { @@ -104,7 +115,7 @@ public void testBasicEviction() throws IOException { ByteRange.of(0L, 1L), ByteRange.of(0L, 1L), (channel, channelPos, relativePos, length) -> 1, - (channel, channelPos, relativePos, length, progressUpdater) -> progressUpdater.accept(length), + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> progressUpdater.accept(length), taskQueue.getThreadPool().generic(), bytesReadFuture ); @@ -538,10 +549,17 @@ public void execute(Runnable command) { final long size = size(250); AtomicLong bytesRead = new AtomicLong(size); final PlainActionFuture future = new PlainActionFuture<>(); - cacheService.maybeFetchFullEntry(cacheKey, size, (channel, channelPos, relativePos, length, progressUpdater) -> { - bytesRead.addAndGet(-length); - progressUpdater.accept(length); - }, bulkExecutor, future); + cacheService.maybeFetchFullEntry( + cacheKey, + size, + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> { + assert streamFactory == null : streamFactory; + bytesRead.addAndGet(-length); + progressUpdater.accept(length); + }, + bulkExecutor, + future + ); future.get(10, TimeUnit.SECONDS); assertEquals(0L, bytesRead.get()); @@ -552,7 +570,7 @@ public void execute(Runnable command) { // a download that would use up all regions should not run final var cacheKey = generateCacheKey(); assertEquals(2, cacheService.freeRegionCount()); - var configured = cacheService.maybeFetchFullEntry(cacheKey, size(500), (ch, chPos, relPos, len, update) -> { + var configured = cacheService.maybeFetchFullEntry(cacheKey, size(500), (ch, chPos, streamFactory, relPos, len, update) -> { throw new AssertionError("Should never reach here"); }, bulkExecutor, ActionListener.noop()); assertFalse(configured); @@ -595,7 +613,9 @@ public void testFetchFullCacheEntryConcurrently() throws Exception { (ActionListener listener) -> cacheService.maybeFetchFullEntry( cacheKey, size, - (channel, channelPos, relativePos, length, progressUpdater) -> progressUpdater.accept(length), + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> progressUpdater.accept( + length + ), bulkExecutor, listener ) @@ -839,7 +859,7 @@ public void testMaybeEvictLeastUsed() throws Exception { var entry = cacheService.get(cacheKey, regionSize, 0); entry.populate( ByteRange.of(0L, regionSize), - (channel, channelPos, relativePos, length, progressUpdater) -> progressUpdater.accept(length), + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> progressUpdater.accept(length), taskQueue.getThreadPool().generic(), ActionListener.noop() ); @@ -930,10 +950,18 @@ public void execute(Runnable command) { final long blobLength = size(250); // 3 regions AtomicLong bytesRead = new AtomicLong(0L); final PlainActionFuture future = new PlainActionFuture<>(); - cacheService.maybeFetchRegion(cacheKey, 0, blobLength, (channel, channelPos, relativePos, length, progressUpdater) -> { - bytesRead.addAndGet(length); - progressUpdater.accept(length); - }, bulkExecutor, future); + cacheService.maybeFetchRegion( + cacheKey, + 0, + blobLength, + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> { + assert streamFactory == null : streamFactory; + bytesRead.addAndGet(length); + progressUpdater.accept(length); + }, + bulkExecutor, + future + ); var fetched = future.get(10, TimeUnit.SECONDS); assertThat("Region has been fetched", fetched, is(true)); @@ -957,7 +985,8 @@ public void execute(Runnable command) { cacheKey, region, blobLength, - (channel, channelPos, relativePos, length, progressUpdater) -> { + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> { + assert streamFactory == null : streamFactory; bytesRead.addAndGet(length); progressUpdater.accept(length); }, @@ -981,7 +1010,7 @@ public void execute(Runnable command) { cacheKey, randomIntBetween(0, 10), randomLongBetween(1L, regionSize), - (channel, channelPos, relativePos, length, progressUpdater) -> { + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> { throw new AssertionError("should not be executed"); }, bulkExecutor, @@ -999,10 +1028,18 @@ public void execute(Runnable command) { long blobLength = randomLongBetween(1L, regionSize); AtomicLong bytesRead = new AtomicLong(0L); final PlainActionFuture future = new PlainActionFuture<>(); - cacheService.maybeFetchRegion(cacheKey, 0, blobLength, (channel, channelPos, relativePos, length, progressUpdater) -> { - bytesRead.addAndGet(length); - progressUpdater.accept(length); - }, bulkExecutor, future); + cacheService.maybeFetchRegion( + cacheKey, + 0, + blobLength, + (channel, channelPos, ignore, relativePos, length, progressUpdater) -> { + assert ignore == null : ignore; + bytesRead.addAndGet(length); + progressUpdater.accept(length); + }, + bulkExecutor, + future + ); var fetched = future.get(10, TimeUnit.SECONDS); assertThat("Region has been fetched", fetched, is(true)); @@ -1073,7 +1110,7 @@ public void execute(Runnable command) { region, range, blobLength, - (channel, channelPos, relativePos, length, progressUpdater) -> { + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> { assertThat(range.start() + relativePos, equalTo(cacheService.getRegionStart(region) + regionRange.start())); assertThat(channelPos, equalTo(Math.toIntExact(regionRange.start()))); assertThat(length, equalTo(Math.toIntExact(regionRange.length()))); @@ -1113,7 +1150,7 @@ public void execute(Runnable command) { region, ByteRange.of(0L, blobLength), blobLength, - (channel, channelPos, relativePos, length, progressUpdater) -> bytesCopied.addAndGet(length), + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> bytesCopied.addAndGet(length), bulkExecutor, listener ); @@ -1136,7 +1173,7 @@ public void execute(Runnable command) { randomIntBetween(0, 10), ByteRange.of(0L, blobLength), blobLength, - (channel, channelPos, relativePos, length, progressUpdater) -> { + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> { throw new AssertionError("should not be executed"); }, bulkExecutor, @@ -1159,7 +1196,7 @@ public void execute(Runnable command) { 0, ByteRange.of(0L, blobLength), blobLength, - (channel, channelPos, relativePos, length, progressUpdater) -> bytesCopied.addAndGet(length), + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> bytesCopied.addAndGet(length), bulkExecutor, future ); @@ -1200,7 +1237,7 @@ public void testPopulate() throws Exception { var entry = cacheService.get(cacheKey, blobLength, 0); AtomicLong bytesWritten = new AtomicLong(0L); final PlainActionFuture future1 = new PlainActionFuture<>(); - entry.populate(ByteRange.of(0, regionSize - 1), (channel, channelPos, relativePos, length, progressUpdater) -> { + entry.populate(ByteRange.of(0, regionSize - 1), (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> { bytesWritten.addAndGet(length); progressUpdater.accept(length); }, taskQueue.getThreadPool().generic(), future1); @@ -1211,7 +1248,7 @@ public void testPopulate() throws Exception { // start populating the second region entry = cacheService.get(cacheKey, blobLength, 1); final PlainActionFuture future2 = new PlainActionFuture<>(); - entry.populate(ByteRange.of(0, regionSize - 1), (channel, channelPos, relativePos, length, progressUpdater) -> { + entry.populate(ByteRange.of(0, regionSize - 1), (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> { bytesWritten.addAndGet(length); progressUpdater.accept(length); }, taskQueue.getThreadPool().generic(), future2); @@ -1219,7 +1256,7 @@ public void testPopulate() throws Exception { // start populating again the first region, listener should be called immediately entry = cacheService.get(cacheKey, blobLength, 0); final PlainActionFuture future3 = new PlainActionFuture<>(); - entry.populate(ByteRange.of(0, regionSize - 1), (channel, channelPos, relativePos, length, progressUpdater) -> { + entry.populate(ByteRange.of(0, regionSize - 1), (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> { bytesWritten.addAndGet(length); progressUpdater.accept(length); }, taskQueue.getThreadPool().generic(), future3); @@ -1306,4 +1343,113 @@ protected int computeCacheFileRegionSize(long fileLength, int region) { } } } + + public void testSharedSourceInputStreamFactory() throws Exception { + final long regionSizeInBytes = size(100); + final Settings settings = Settings.builder() + .put(NODE_NAME_SETTING.getKey(), "node") + .put(SharedBlobCacheService.SHARED_CACHE_SIZE_SETTING.getKey(), ByteSizeValue.ofBytes(size(200)).getStringRep()) + .put(SharedBlobCacheService.SHARED_CACHE_REGION_SIZE_SETTING.getKey(), ByteSizeValue.ofBytes(regionSizeInBytes).getStringRep()) + .put("path.home", createTempDir()) + .build(); + final ThreadPool threadPool = new TestThreadPool("test"); + try ( + NodeEnvironment environment = new NodeEnvironment(settings, TestEnvironment.newEnvironment(settings)); + var cacheService = new SharedBlobCacheService<>( + environment, + settings, + threadPool, + ThreadPool.Names.GENERIC, + BlobCacheMetrics.NOOP + ) + ) { + final var cacheKey = generateCacheKey(); + assertEquals(2, cacheService.freeRegionCount()); + final var region = cacheService.get(cacheKey, size(250), 0); + assertEquals(regionSizeInBytes, region.tracker.getLength()); + + // Read disjoint ranges to create holes in the region + final long interval = regionSizeInBytes / between(5, 20); + for (var start = interval; start < regionSizeInBytes - 2 * SharedBytes.PAGE_SIZE; start += interval) { + final var range = ByteRange.of(start, start + SharedBytes.PAGE_SIZE); + final PlainActionFuture future = new PlainActionFuture<>(); + region.populateAndRead( + range, + range, + (channel, channelPos, relativePos, length) -> length, + (channel, channelPos, streamFactory, relativePos, length, progressUpdater) -> progressUpdater.accept(length), + EsExecutors.DIRECT_EXECUTOR_SERVICE, + future + ); + safeGet(future); + } + + // Read the entire region with a shared source input stream and we want to ensure the following behaviours + // 1. fillCacheRange is invoked as many times as the number of holes/gaps + // 2. fillCacheRange is invoked single threaded with the gap order + // 3. The shared streamFactory is passed to each invocation + // 4. The factory is closed at the end + final int numberGaps = region.tracker.getCompletedRanges().size() + 1; + final var invocationCounter = new AtomicInteger(); + final var factoryClosed = new AtomicBoolean(false); + final var dummyStreamFactory = new SourceInputStreamFactory() { + @Override + public InputStream create(int relativePos) { + return null; + } + + @Override + public void close() { + factoryClosed.set(true); + } + }; + + final var rangeMissingHandler = new RangeMissingHandler() { + final AtomicReference invocationThread = new AtomicReference<>(); + final AtomicInteger position = new AtomicInteger(-1); + + @Override + public SourceInputStreamFactory sharedInputStreamFactory(List gaps) { + return dummyStreamFactory; + } + + @Override + public void fillCacheRange( + SharedBytes.IO channel, + int channelPos, + SourceInputStreamFactory streamFactory, + int relativePos, + int length, + IntConsumer progressUpdater + ) throws IOException { + if (invocationCounter.incrementAndGet() == 1) { + final Thread witness = invocationThread.compareAndExchange(null, Thread.currentThread()); + assertThat(witness, nullValue()); + } else { + assertThat(invocationThread.get(), sameInstance(Thread.currentThread())); + } + assertThat(streamFactory, sameInstance(dummyStreamFactory)); + assertThat(position.getAndSet(relativePos), lessThan(relativePos)); + progressUpdater.accept(length); + } + }; + + final var range = ByteRange.of(0, regionSizeInBytes); + final PlainActionFuture future = new PlainActionFuture<>(); + region.populateAndRead( + range, + range, + (channel, channelPos, relativePos, length) -> length, + rangeMissingHandler, + threadPool.generic(), + future + ); + safeGet(future); + assertThat(invocationCounter.get(), equalTo(numberGaps)); + assertThat(region.tracker.checkAvailable(regionSizeInBytes), is(true)); + assertBusy(() -> assertThat(factoryClosed.get(), is(true))); + } finally { + threadPool.shutdown(); + } + } } diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/FrozenIndexInput.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/FrozenIndexInput.java index 931e8790f98c6..56efc72f2f6f7 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/FrozenIndexInput.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/FrozenIndexInput.java @@ -146,7 +146,8 @@ private void readWithoutBlobCacheSlow(ByteBuffer b, long position, int length) t final int read = SharedBytes.readCacheFile(channel, pos, relativePos, len, byteBufferReference); stats.addCachedBytesRead(read); return read; - }, (channel, channelPos, relativePos, len, progressUpdater) -> { + }, (channel, channelPos, streamFactory, relativePos, len, progressUpdater) -> { + assert streamFactory == null : streamFactory; final long startTimeNanos = stats.currentTimeNanos(); try (InputStream input = openInputStreamFromBlobStore(rangeToWrite.start() + relativePos, len)) { assert ThreadPool.assertCurrentThreadPool(SearchableSnapshots.CACHE_FETCH_ASYNC_THREAD_POOL_NAME); From 7ba5b4fb1665e058288bfd24a0dcf9f5f52e2c7c Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 18 Jul 2024 08:11:58 +0100 Subject: [PATCH 36/65] Remove unused `AsyncSnapshotInfo#getSnapshotId` (#111005) This method became unused with #110957. --- .../get/TransportGetSnapshotsAction.java | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java index d36cf7bf08b1f..384a004861776 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java @@ -321,12 +321,6 @@ private void ensureRequiredNamesPresent(String repositoryName, @Nullable Reposit * An asynchronous supplier of a {@link SnapshotInfo}. */ private interface AsyncSnapshotInfo { - - /** - * @return the {@link SnapshotId} of the {@link SnapshotInfo} to be retrieved. - */ - SnapshotId getSnapshotId(); - /** * @param listener completed, possibly asynchronously, with the appropriate {@link SnapshotInfo}. */ @@ -338,11 +332,6 @@ private interface AsyncSnapshotInfo { */ private AsyncSnapshotInfo forSnapshotInProgress(SnapshotsInProgress.Entry snapshotInProgress) { return new AsyncSnapshotInfo() { - @Override - public SnapshotId getSnapshotId() { - return snapshotInProgress.snapshot().getSnapshotId(); - } - @Override public void getSnapshotInfo(ActionListener listener) { final var snapshotInfo = SnapshotInfo.inProgress(snapshotInProgress); @@ -366,11 +355,6 @@ private AsyncSnapshotInfo forCompletedSnapshot( Map> indicesLookup ) { return new AsyncSnapshotInfo() { - @Override - public SnapshotId getSnapshotId() { - return snapshotId; - } - @Override public void getSnapshotInfo(ActionListener listener) { if (verbose) { From b535df78dfdeddf38076fa65b2aa202503f11bf3 Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Thu, 18 Jul 2024 08:41:00 +0100 Subject: [PATCH 37/65] [DOCS] Retrievers and rerankers (#110007) Co-authored-by: Adam Demjen --- docs/reference/search/retriever.asciidoc | 67 ++++++++ .../retrievers-reranking/index.asciidoc | 8 + .../retrievers-overview.asciidoc | 71 ++++---- .../semantic-reranking.asciidoc | 151 ++++++++++++++++++ .../search-your-data.asciidoc | 2 +- 5 files changed, 261 insertions(+), 38 deletions(-) create mode 100644 docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc rename docs/reference/search/search-your-data/{ => retrievers-reranking}/retrievers-overview.asciidoc (75%) create mode 100644 docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 590df272cc89e..ed39ac786880b 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -28,6 +28,9 @@ A <> that replaces the functionality of a <> that produces top documents from <>. +`text_similarity_reranker`:: +A <> that enhances search results by re-ranking documents based on semantic similarity to a specified inference text, using a machine learning model. + [[standard-retriever]] ==== Standard Retriever @@ -201,6 +204,70 @@ GET /index/_search ---- // NOTCONSOLE +[[text-similarity-reranker-retriever]] +==== Text Similarity Re-ranker Retriever + +The `text_similarity_reranker` is a type of retriever that enhances search results by re-ranking documents based on semantic similarity to a specified inference text, using a machine learning model. + +===== Prerequisites + +To use `text_similarity_reranker` you must first set up a `rerank` task using the <>. +The `rerank` task should be set up with a machine learning model that can compute text similarity. +Currently you can integrate directly with the Cohere Rerank endpoint using the <> task, or upload a model to {es} <>. + +===== Parameters + +`field`:: +(Required, `string`) ++ +The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the `inferenceText`. + +`inference_id`:: +(Required, `string`) ++ +Unique identifier of the inference endpoint created using the {infer} API. + +`inference_text`:: +(Required, `string`) ++ +The text snippet used as the basis for similarity comparison. + +`rank_window_size`:: +(Optional, `int`) ++ +The number of top documents to consider in the re-ranking process. Defaults to `10`. + +`min_score`:: +(Optional, `float`) ++ +Sets a minimum threshold score for including documents in the re-ranked results. Documents with similarity scores below this threshold will be excluded. Note that score calculations vary depending on the model used. + +===== Restrictions + +A text similarity re-ranker retriever is a compound retriever. Child retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. + +===== Example + +[source,js] +---- +GET /index/_search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { ... } + } + }, + "field": "text", + "inference_id": "my-cohere-rerank-model", + "inference_text": "Most famous landmark in Paris", + "rank_window_size": 100, + "min_score": 0.5 + } +} +---- +// NOTCONSOLE + ==== Using `from` and `size` with a retriever tree The <> and <> diff --git a/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc new file mode 100644 index 0000000000000..87ed52e365370 --- /dev/null +++ b/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc @@ -0,0 +1,8 @@ +[[retrievers-reranking-overview]] +== Retrievers and reranking + +* <> +* <> + +include::retrievers-overview.asciidoc[] +include::semantic-reranking.asciidoc[] diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc similarity index 75% rename from docs/reference/search/search-your-data/retrievers-overview.asciidoc rename to docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc index 92cd085583916..99659ae76e092 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc @@ -1,7 +1,5 @@ [[retrievers-overview]] -== Retrievers - -// Will move to a top level "Retrievers and reranking" section once reranking is live +=== Retrievers preview::[] @@ -15,33 +13,32 @@ For implementation details, including notable restrictions, check out the [discrete] [[retrievers-overview-types]] -=== Retriever types +==== Retriever types Retrievers come in various types, each tailored for different search operations. The following retrievers are currently available: -* <>. -Returns top documents from a traditional https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl.html[query]. -Mimics a traditional query but in the context of a retriever framework. -This ensures backward compatibility as existing `_search` requests remain supported. -That way you can transition to the new abstraction at your own pace without mixing syntaxes. -* <>. -Returns top documents from a <>, in the context of a retriever framework. -* <>. -Combines and ranks multiple first-stage retrievers using the reciprocal rank fusion (RRF) algorithm. -Allows you to combine multiple result sets with different relevance indicators into a single result set. -An RRF retriever is a *compound retriever*, where its `filter` element is propagated to its sub retrievers. +* <>. Returns top documents from a +traditional https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl.html[query]. +Mimics a traditional query but in the context of a retriever framework. This +ensures backward compatibility as existing `_search` requests remain supported. +That way you can transition to the new abstraction at your own pace without +mixing syntaxes. +* <>. Returns top documents from a <>, +in the context of a retriever framework. +* <>. Combines and ranks multiple first-stage retrievers using +the reciprocal rank fusion (RRF) algorithm. Allows you to combine multiple result sets +with different relevance indicators into a single result set. +An RRF retriever is a *compound retriever*, where its `filter` element is +propagated to its sub retrievers. + Sub retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. See the <> for detailed examples and information on how to use the RRF retriever. - -[NOTE] -==== -Stay tuned for more retriever types in future releases! -==== +* <>. Used for <>. +Requires first creating a `rerank` task using the <>. [discrete] -=== What makes retrievers useful? +==== What makes retrievers useful? Here's an overview of what makes retrievers useful and how they differ from regular queries. @@ -73,7 +70,7 @@ When using compound retrievers, only the query element is allowed, which enforce [discrete] [[retrievers-overview-example]] -=== Example +==== Example The following example demonstrates how using retrievers simplify the composability of queries for RRF ranking. @@ -154,25 +151,23 @@ GET example-index/_search [discrete] [[retrievers-overview-glossary]] -=== Glossary +==== Glossary Here are some important terms: -* *Retrieval Pipeline*. -Defines the entire retrieval and ranking logic to produce top hits. -* *Retriever Tree*. -A hierarchical structure that defines how retrievers interact. -* *First-stage Retriever*. -Returns an initial set of candidate documents. -* *Compound Retriever*. -Builds on one or more retrievers, enhancing document retrieval and ranking logic. -* *Combiners*. -Compound retrievers that merge top hits from multiple sub-retrievers. -//* NOT YET *Rerankers*. Special compound retrievers that reorder hits and may adjust the number of hits, with distinctions between first-stage and second-stage rerankers. +* *Retrieval Pipeline*. Defines the entire retrieval and ranking logic to +produce top hits. +* *Retriever Tree*. A hierarchical structure that defines how retrievers interact. +* *First-stage Retriever*. Returns an initial set of candidate documents. +* *Compound Retriever*. Builds on one or more retrievers, +enhancing document retrieval and ranking logic. +* *Combiners*. Compound retrievers that merge top hits +from multiple sub-retrievers. +* *Rerankers*. Special compound retrievers that reorder hits and may adjust the number of hits, with distinctions between first-stage and second-stage rerankers. [discrete] [[retrievers-overview-play-in-search]] -=== Retrievers in action +==== Retrievers in action The Search Playground builds Elasticsearch queries using the retriever abstraction. It automatically detects the fields and types in your index and builds a retriever tree based on your selections. @@ -180,7 +175,9 @@ It automatically detects the fields and types in your index and builds a retriev You can use the Playground to experiment with different retriever configurations and see how they affect search results. Refer to the {kibana-ref}/playground.html[Playground documentation] for more information. -// Content coming in https://github.com/elastic/kibana/pull/182692 - +[discrete] +[[retrievers-overview-api-reference]] +==== API reference +For implementation details, including notable restrictions, check out the <> in the Search API docs. \ No newline at end of file diff --git a/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc new file mode 100644 index 0000000000000..75c06aa953302 --- /dev/null +++ b/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc @@ -0,0 +1,151 @@ +[[semantic-reranking]] +=== Semantic reranking + +preview::[] + +[TIP] +==== +This overview focuses more on the high-level concepts and use cases for semantic reranking. For full implementation details on how to set up and use semantic reranking in {es}, see the <> in the Search API docs. +==== + +Rerankers improve the relevance of results from earlier-stage retrieval mechanisms. +_Semantic_ rerankers use machine learning models to reorder search results based on their semantic similarity to a query. + +First-stage retrievers and rankers must be very fast and efficient because they process either the entire corpus, or all matching documents. +In a multi-stage pipeline, you can progressively use more computationally intensive ranking functions and techniques, as they will operate on smaller result sets at each step. +This helps avoid query latency degradation and keeps costs manageable. + +Semantic reranking requires relatively large and complex machine learning models and operates in real-time in response to queries. +This technique makes sense on a small _top-k_ result set, as one the of the final steps in a pipeline. +This is a powerful technique for improving search relevance that works equally well with keyword, semantic, or hybrid retrieval algorithms. + +The next sections provide more details on the benefits, use cases, and model types used for semantic reranking. +The final sections include a practical, high-level overview of how to implement <> and links to the full reference documentation. + +[discrete] +[[semantic-reranking-use-cases]] +==== Use cases + +Semantic reranking enables a variety of use cases: + +* *Lexical (BM25) retrieval results reranking* +** Out-of-the-box semantic search by adding a simple API call to any lexical/BM25 retrieval pipeline. +** Adds semantic search capabilities on top of existing indices without reindexing, perfect for quick improvements. +** Ideal for environments with complex existing indices. + +* *Semantic retrieval results reranking* +** Improves results from semantic retrievers using ELSER sparse vector embeddings or dense vector embeddings by using more powerful models. +** Adds a refinement layer on top of hybrid retrieval with <>. + +* *General applications* +** Supports automatic and transparent chunking, eliminating the need for pre-chunking at index time. +** Provides explicit control over document relevance in retrieval-augmented generation (RAG) uses cases or other scenarios involving language model (LLM) inputs. + +Now that we've outlined the value of semantic reranking, we'll explore the specific models that power this process and how they differ. + +[discrete] +[[semantic-reranking-models]] +==== Cross-encoder and bi-encoder models + +At a high level, two model types are used for semantic reranking: cross-encoders and bi-encoders. + +NOTE: In this version, {es} *only supports cross-encoders* for semantic reranking. + +* A *cross-encoder model* can be thought of as a more powerful, all-in-one solution, because it generates query-aware document representations. +It takes the query and document texts as a single, concatenated input. +* A *bi-encoder model* takes as input either document or query text. +Documents and query embeddings are computed separately, so they aren't aware of each other. +** To compute a ranking score, an external operation is required. This typically involves computing dot-product or cosine similarity between the query and document embeddings. + +In brief, cross-encoders provide high accuracy but are more resource-intensive. +Bi-encoders are faster and more cost-effective but less precise. + +In future versions, {es} will also support bi-encoders. +If you're interested in a more detailed analysis of the practical differences between cross-encoders and bi-encoders, untoggle the next section. + +.Comparisons between cross-encoder and bi-encoder +[%collapsible] +============== +The following is a non-exhaustive list of considerations when choosing between cross-encoders and bi-encoders for semantic reranking: + +* Because a cross-encoder model simultaneously processes both query and document texts, it can better infer their relevance, making it more effective as a reranker than a bi-encoder. +* Cross-encoder models are generally larger and more computationally intensive, resulting in higher latencies and increased computational costs. +* There are significantly fewer open-source cross-encoders, while bi-encoders offer a wide variety of sizes, languages, and other trade-offs. +* The effectiveness of cross-encoders can also improve the relevance of semantic retrievers. +For example, their ability to take word order into account can improve on dense or sparse embedding retrieval. +* When trained in tandem with specific retrievers (like lexical/BM25), cross-encoders can “correct” typical errors made by those retrievers. +* Cross-encoders output scores that are consistent across queries. +This enables you to maintain high relevance in result sets, by setting a minimum score threshold for all queries. +For example, this is important when using results in a RAG workflow or if you're otherwise feeding results to LLMs. +Note that similarity scores from bi-encoders/embedding similarities are _query-dependent_, meaning you cannot set universal cut-offs. +* Bi-encoders rerank using embeddings. You can improve your reranking latency by creating embeddings at ingest-time. These embeddings can be stored for reranking without being indexed for retrieval, reducing your memory footprint. +============== + +[discrete] +[[semantic-reranking-in-es]] +==== Semantic reranking in {es} + +In {es}, semantic rerankers are implemented using the {es} <> and a <>. + +To use semantic reranking in {es}, you need to: + +. Choose a reranking model. In addition to cross-encoder models running on {es} inference nodes, we also expose external models and services via the Inference API to semantic rerankers. +** This includes cross-encoder models running in https://huggingface.co/inference-endpoints[HuggingFace Inference Endpoints] and the https://cohere.com/rerank[Cohere Rerank API]. +. Create a `rerank` task using the <>. +The Inference API creates an inference endpoint and configures your chosen machine learning model to perform the reranking task. +. Define a `text_similarity_reranker` retriever in your search request. +The retriever syntax makes it simple to configure both the retrieval and reranking of search results in a single API call. + +.*Example search request* with semantic reranker +[%collapsible] +============== +The following example shows a search request that uses a semantic reranker to reorder the top-k documents based on their semantic similarity to the query. +[source,console] +---- +POST _search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "match": { + "text": "How often does the moon hide the sun?" + } + } + } + }, + "field": "text", + "inference_id": "my-cohere-rerank-model", + "inference_text": "How often does the moon hide the sun?", + "rank_window_size": 100, + "min_score": 0.5 + } + } +} +---- +// TEST[skip:TBD] +============== + +[discrete] +[[semantic-reranking-types]] +==== Supported reranking types + +The following `text_similarity_reranker` model configuration options are available. + +*Text similarity with cross-encoder* + +This solution uses a hosted or 3rd party inference service which relies on a cross-encoder model. +The model receives the text fields from the _top-K_ documents, as well as the search query, and calculates scores directly, which are then used to rerank the documents. + +Used with the Cohere inference service rolled out in 8.13, turn on semantic reranking that works out of the box. +Check out our https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/cohere/cohere-elasticsearch.ipynb[Python notebook] for using Cohere with {es}. + +[discrete] +[[semantic-reranking-learn-more]] +==== Learn more + +* Read the <> for syntax and implementation details +* Learn more about the <> abstraction +* Learn more about the Elastic <> +* Check out our https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/cohere/cohere-elasticsearch.ipynb[Python notebook] for using Cohere with {es} \ No newline at end of file diff --git a/docs/reference/search/search-your-data/search-your-data.asciidoc b/docs/reference/search/search-your-data/search-your-data.asciidoc index e1c1618410f2f..a885df2f2179e 100644 --- a/docs/reference/search/search-your-data/search-your-data.asciidoc +++ b/docs/reference/search/search-your-data/search-your-data.asciidoc @@ -45,7 +45,7 @@ results directly in the Kibana Search UI. include::search-api.asciidoc[] include::knn-search.asciidoc[] include::semantic-search.asciidoc[] -include::retrievers-overview.asciidoc[] +include::retrievers-reranking/index.asciidoc[] include::learning-to-rank.asciidoc[] include::search-across-clusters.asciidoc[] include::search-with-synonyms.asciidoc[] From ebdcf4497b4c1e74cccb47ef241540f88155a79c Mon Sep 17 00:00:00 2001 From: Tim Vernum Date: Thu, 18 Jul 2024 17:46:26 +1000 Subject: [PATCH 38/65] Update logging for role mappings (#110870) Fixes the formatter for log messages in ExpressionRoleMapping and adds more logging in ClusterStateRoleMapper and NativeRoleMappingStore --- .../security/authc/support/mapper/ExpressionRoleMapping.java | 4 ++-- .../security/authc/support/mapper/ClusterStateRoleMapper.java | 4 +++- .../security/authc/support/mapper/NativeRoleMappingStore.java | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/mapper/ExpressionRoleMapping.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/mapper/ExpressionRoleMapping.java index 461619f2279f6..17088cff8718b 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/mapper/ExpressionRoleMapping.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/mapper/ExpressionRoleMapping.java @@ -91,12 +91,12 @@ public static Set resolveRoles( .flatMap(m -> { Set roleNames = m.getRoleNames(scriptService, model); logger.trace( - () -> format("Applying role-mapping [{}] to user-model [{}] produced role-names [{}]", m.getName(), model, roleNames) + () -> format("Applying role-mapping [%s] to user-model [%s] produced role-names [%s]", m.getName(), model, roleNames) ); return roleNames.stream(); }) .collect(Collectors.toSet()); - logger.debug(() -> format("Mapping user [{}] to roles [{}]", user, roles)); + logger.debug(() -> format("Mapping user [%s] to roles [%s]", user, roles)); return roles; } diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/support/mapper/ClusterStateRoleMapper.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/support/mapper/ClusterStateRoleMapper.java index a31da43021c89..9a6e9e75c4685 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/support/mapper/ClusterStateRoleMapper.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/support/mapper/ClusterStateRoleMapper.java @@ -85,7 +85,9 @@ private Set getMappings() { if (enabled == false) { return Set.of(); } else { - return RoleMappingMetadata.getFromClusterState(clusterService.state()).getRoleMappings(); + final Set mappings = RoleMappingMetadata.getFromClusterState(clusterService.state()).getRoleMappings(); + logger.trace("Retrieved [{}] mapping(s) from cluster state", mappings.size()); + return mappings; } } } diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/support/mapper/NativeRoleMappingStore.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/support/mapper/NativeRoleMappingStore.java index 7f35415d6f630..beabf93e80e0d 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/support/mapper/NativeRoleMappingStore.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/support/mapper/NativeRoleMappingStore.java @@ -397,6 +397,7 @@ public void onSecurityIndexStateChange(SecurityIndexManager.State previousState, @Override public void resolveRoles(UserData user, ActionListener> listener) { getRoleMappings(null, ActionListener.wrap(mappings -> { + logger.trace("Retrieved [{}] role mapping(s) from security index", mappings.size()); listener.onResponse(ExpressionRoleMapping.resolveRoles(user, mappings, scriptService, logger)); }, listener::onFailure)); } From ac1cedadda2163d668dc16c00b5f4c35b86e502d Mon Sep 17 00:00:00 2001 From: Salvatore Campagna <93581129+salvatore-campagna@users.noreply.github.com> Date: Thu, 18 Jul 2024 09:50:06 +0200 Subject: [PATCH 39/65] Extract method and remove unused variable (#110975) --- ...ardVersusLogsIndexModeChallengeRestIT.java | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java index eb98e4facf850..7c209ba855544 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java @@ -132,9 +132,7 @@ public void testMatchAllQuery() throws IOException, MatcherException { documents.add(generateDocument(Instant.now().plus(i, ChronoUnit.SECONDS))); } - final Tuple tuple = indexDocuments(() -> documents, () -> documents); - assertThat(tuple.v1().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); - assertThat(tuple.v2().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); + assertDocumentIndexing(documents); final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(QueryBuilders.matchAllQuery()) .size(numberOfDocuments); @@ -150,13 +148,10 @@ public void testTermsQuery() throws IOException, MatcherException { final List documents = new ArrayList<>(); int numberOfDocuments = randomIntBetween(100, 200); for (int i = 0; i < numberOfDocuments; i++) { - final String method = randomFrom("put", "post", "get"); documents.add(generateDocument(Instant.now().plus(i, ChronoUnit.SECONDS))); } - final Tuple tuple = indexDocuments(() -> documents, () -> documents); - assertThat(tuple.v1().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); - assertThat(tuple.v2().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); + assertDocumentIndexing(documents); final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(QueryBuilders.termQuery("method", "put")) .size(numberOfDocuments); @@ -175,9 +170,7 @@ public void testHistogramAggregation() throws IOException, MatcherException { documents.add(generateDocument(Instant.now().plus(i, ChronoUnit.SECONDS))); } - final Tuple tuple = indexDocuments(() -> documents, () -> documents); - assertThat(tuple.v1().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); - assertThat(tuple.v2().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); + assertDocumentIndexing(documents); final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(QueryBuilders.matchAllQuery()) .size(numberOfDocuments) @@ -197,9 +190,7 @@ public void testTermsAggregation() throws IOException, MatcherException { documents.add(generateDocument(Instant.now().plus(i, ChronoUnit.SECONDS))); } - final Tuple tuple = indexDocuments(() -> documents, () -> documents); - assertThat(tuple.v1().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); - assertThat(tuple.v2().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); + assertDocumentIndexing(documents); final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(QueryBuilders.matchAllQuery()) .size(0) @@ -219,9 +210,7 @@ public void testDateHistogramAggregation() throws IOException, MatcherException documents.add(generateDocument(Instant.now().plus(i, ChronoUnit.SECONDS))); } - final Tuple tuple = indexDocuments(() -> documents, () -> documents); - assertThat(tuple.v1().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); - assertThat(tuple.v2().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); + assertDocumentIndexing(documents); final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(QueryBuilders.matchAllQuery()) .aggregation(AggregationBuilders.dateHistogram("agg").field("@timestamp").calendarInterval(DateHistogramInterval.SECOND)) @@ -261,4 +250,10 @@ private static List> getAggregationBuckets(final Response re return (List>) agg.get("buckets"); } + private void assertDocumentIndexing(List documents) throws IOException { + final Tuple tuple = indexDocuments(() -> documents, () -> documents); + assertThat(tuple.v1().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); + assertThat(tuple.v2().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); + } + } From 6191fe3b16a28af184c4b0a31a34f7dbb14bb2cd Mon Sep 17 00:00:00 2001 From: Carlos Delgado <6339205+carlosdelest@users.noreply.github.com> Date: Thu, 18 Jul 2024 10:20:26 +0200 Subject: [PATCH 40/65] Clarify synonyms docs (#110822) --- .../synonym-graph-tokenfilter.asciidoc | 135 +++++++++++------ .../tokenfilters/synonym-tokenfilter.asciidoc | 139 ++++++++++++------ .../tokenfilters/synonyms-format.asciidoc | 2 +- .../search-with-synonyms.asciidoc | 13 ++ .../synonyms/apis/synonyms-apis.asciidoc | 17 +++ 5 files changed, 220 insertions(+), 86 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc index 3efb8f6de9b3e..e37118019a55c 100644 --- a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc @@ -85,45 +85,45 @@ Additional settings are: <> search analyzers to pick up changes to synonym files. Only to be used for search analyzers. * `expand` (defaults to `true`). -* `lenient` (defaults to `false`). If `true` ignores exceptions while parsing the synonym configuration. It is important -to note that only those synonym rules which cannot get parsed are ignored. For instance consider the following request: - -[source,console] --------------------------------------------------- -PUT /test_index -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "synonym": { - "tokenizer": "standard", - "filter": [ "my_stop", "synonym_graph" ] - } - }, - "filter": { - "my_stop": { - "type": "stop", - "stopwords": [ "bar" ] - }, - "synonym_graph": { - "type": "synonym_graph", - "lenient": true, - "synonyms": [ "foo, bar => baz" ] - } - } - } - } - } -} --------------------------------------------------- +Expands definitions for equivalent synonym rules. +See <>. +* `lenient` (defaults to `false`). +If `true` ignores errors while parsing the synonym configuration. +It is important to note that only those synonym rules which cannot get parsed are ignored. +See <> for an example of `lenient` behaviour for invalid synonym rules. + +[discrete] +[[synonym-graph-tokenizer-expand-equivalent-synonyms]] +===== `expand` equivalent synonym rules + +The `expand` parameter controls whether to expand equivalent synonym rules. +Consider a synonym defined like: + +`foo, bar, baz` + +Using `expand: true`, the synonym rule would be expanded into: -With the above request the word `bar` gets skipped but a mapping `foo => baz` is still added. However, if the mapping -being added was `foo, baz => bar` nothing would get added to the synonym list. This is because the target word for the -mapping is itself eliminated because it was a stop word. Similarly, if the mapping was "bar, foo, baz" and `expand` was -set to `false` no mapping would get added as when `expand=false` the target mapping is the first word. However, if -`expand=true` then the mappings added would be equivalent to `foo, baz => foo, baz` i.e, all mappings other than the -stop word. +``` +foo => foo +foo => bar +foo => baz +bar => foo +bar => bar +bar => baz +baz => foo +baz => bar +baz => baz +``` + +When `expand` is set to `false`, the synonym rule is not expanded and the first synonym is treated as the canonical representation. The synonym would be equivalent to: + +``` +foo => foo +bar => foo +baz => foo +``` + +The `expand` parameter does not affect explicit synonym rules, like `foo, bar => baz`. [discrete] [[synonym-graph-tokenizer-ignore_case-deprecated]] @@ -160,12 +160,65 @@ Text will be processed first through filters preceding the synonym filter before {es} will also use the token filters preceding the synonym filter in a tokenizer chain to parse the entries in a synonym file or synonym set. In the above example, the synonyms graph token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. -The synonym rules should not contain words that are removed by a filter that appears later in the chain (like a `stop` filter). -Removing a term from a synonym rule means there will be no matching for it at query time. - Because entries in the synonym map cannot have stacked positions, some token filters may cause issues here. Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms. For example, `asciifolding` will only produce the folded version of the token. Others, like `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. If you need to build analyzers that include both multi-token filters and synonym filters, consider using the <> filter, with the multi-token filters in one branch and the synonym filter in the other. + +[discrete] +[[synonym-graph-tokenizer-stop-token-filter]] +===== Synonyms and `stop` token filters + +Synonyms and <> interact with each other in the following ways: + +[discrete] +====== Stop token filter *before* synonym token filter + +Stop words will be removed from the synonym rule definition. +This can can cause errors on the synonym rule. + +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + +For *explicit synonym rules* like `foo, bar => baz` with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the left hand side of the synonym rule. +- If `lenient` is set to `true`, the rule `foo => baz` will be added and `bar => baz` will be ignored. + +If the stop filter removed `baz` instead: + +- If `lenient` is set to `false`, an error will be raised as `baz` would be removed from the right hand side of the synonym rule. +- If `lenient` is set to `true`, the synonym will have no effect as the target word is removed. + +For *equivalent synonym rules* like `foo, bar, baz` and `expand: true, with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the synonym rule. +- If `lenient` is set to `true`, the synonyms added would be equivalent to the following synonym rules, which do not contain the removed word: + +``` +foo => foo +foo => baz +baz => foo +baz => baz +``` + +[discrete] +====== Stop token filter *after* synonym token filter + +The stop filter will remove the terms from the resulting synonym expansion. + +For example, a synonym rule like `foo, bar => baz` and a stop filter that removes `baz` will get no matches for `foo` or `bar`, as both would get expanded to `baz` which is removed by the stop filter. + +If the stop filter removed `foo` instead, then searching for `foo` would get expanded to `baz`, which is not removed by the stop filter thus potentially providing matches for `baz`. diff --git a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc index 046cd297b5092..1658f016db60b 100644 --- a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc @@ -73,47 +73,45 @@ Additional settings are: <> search analyzers to pick up changes to synonym files. Only to be used for search analyzers. * `expand` (defaults to `true`). -* `lenient` (defaults to `false`). If `true` ignores exceptions while parsing the synonym configuration. It is important -to note that only those synonym rules which cannot get parsed are ignored. For instance consider the following request: - - -[source,console] --------------------------------------------------- -PUT /test_index -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "synonym": { - "tokenizer": "standard", - "filter": [ "my_stop", "synonym" ] - } - }, - "filter": { - "my_stop": { - "type": "stop", - "stopwords": [ "bar" ] - }, - "synonym": { - "type": "synonym", - "lenient": true, - "synonyms": [ "foo, bar => baz" ] - } - } - } - } - } -} --------------------------------------------------- +Expands definitions for equivalent synonym rules. +See <>. +* `lenient` (defaults to `false`). +If `true` ignores errors while parsing the synonym configuration. +It is important to note that only those synonym rules which cannot get parsed are ignored. +See <> for an example of `lenient` behaviour for invalid synonym rules. + +[discrete] +[[synonym-tokenizer-expand-equivalent-synonyms]] +===== `expand` equivalent synonym rules + +The `expand` parameter controls whether to expand equivalent synonym rules. +Consider a synonym defined like: + +`foo, bar, baz` + +Using `expand: true`, the synonym rule would be expanded into: -With the above request the word `bar` gets skipped but a mapping `foo => baz` is still added. However, if the mapping -being added was `foo, baz => bar` nothing would get added to the synonym list. This is because the target word for the -mapping is itself eliminated because it was a stop word. Similarly, if the mapping was "bar, foo, baz" and `expand` was -set to `false` no mapping would get added as when `expand=false` the target mapping is the first word. However, if -`expand=true` then the mappings added would be equivalent to `foo, baz => foo, baz` i.e, all mappings other than the -stop word. +``` +foo => foo +foo => bar +foo => baz +bar => foo +bar => bar +bar => baz +baz => foo +baz => bar +baz => baz +``` +When `expand` is set to `false`, the synonym rule is not expanded and the first synonym is treated as the canonical representation. The synonym would be equivalent to: + +``` +foo => foo +bar => foo +baz => foo +``` + +The `expand` parameter does not affect explicit synonym rules, like `foo, bar => baz`. [discrete] [[synonym-tokenizer-ignore_case-deprecated]] @@ -135,7 +133,7 @@ To apply synonyms, you will need to include a synonym token filters into an anal "my_analyzer": { "type": "custom", "tokenizer": "standard", - "filter": ["stemmer", "synonym_graph"] + "filter": ["stemmer", "synonym"] } } ---- @@ -148,10 +146,7 @@ Order is important for your token filters. Text will be processed first through filters preceding the synonym filter before being processed by the synonym filter. {es} will also use the token filters preceding the synonym filter in a tokenizer chain to parse the entries in a synonym file or synonym set. -In the above example, the synonyms graph token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. - -The synonym rules should not contain words that are removed by a filter that appears later in the chain (like a `stop` filter). -Removing a term from a synonym rule means there will be no matching for it at query time. +In the above example, the synonyms token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. Because entries in the synonym map cannot have stacked positions, some token filters may cause issues here. Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms. @@ -159,3 +154,59 @@ For example, `asciifolding` will only produce the folded version of the token. Others, like `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. If you need to build analyzers that include both multi-token filters and synonym filters, consider using the <> filter, with the multi-token filters in one branch and the synonym filter in the other. + +[discrete] +[[synonym-tokenizer-stop-token-filter]] +===== Synonyms and `stop` token filters + +Synonyms and <> interact with each other in the following ways: + +[discrete] +====== Stop token filter *before* synonym token filter + +Stop words will be removed from the synonym rule definition. +This can can cause errors on the synonym rule. + +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + +For *explicit synonym rules* like `foo, bar => baz` with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the left hand side of the synonym rule. +- If `lenient` is set to `true`, the rule `foo => baz` will be added and `bar => baz` will be ignored. + +If the stop filter removed `baz` instead: + +- If `lenient` is set to `false`, an error will be raised as `baz` would be removed from the right hand side of the synonym rule. +- If `lenient` is set to `true`, the synonym will have no effect as the target word is removed. + +For *equivalent synonym rules* like `foo, bar, baz` and `expand: true, with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the synonym rule. +- If `lenient` is set to `true`, the synonyms added would be equivalent to the following synonym rules, which do not contain the removed word: + +``` +foo => foo +foo => baz +baz => foo +baz => baz +``` + +[discrete] +====== Stop token filter *after* synonym token filter + +The stop filter will remove the terms from the resulting synonym expansion. + +For example, a synonym rule like `foo, bar => baz` and a stop filter that removes `baz` will get no matches for `foo` or `bar`, as both would get expanded to `baz` which is removed by the stop filter. + +If the stop filter removed `foo` instead, then searching for `foo` would get expanded to `baz`, which is not removed by the stop filter thus potentially providing matches for `baz`. diff --git a/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc b/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc index 63dd72dade8d0..e780c24963312 100644 --- a/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc @@ -15,7 +15,7 @@ This format uses two different definitions: ipod, i-pod, i pod computer, pc, laptop ---- -* Explicit mappings: Matches a group of words to other words. Words on the left hand side of the rule definition are expanded into all the possibilities described on the right hand side. Example: +* Explicit synonyms: Matches a group of words to other words. Words on the left hand side of the rule definition are expanded into all the possibilities described on the right hand side. Example: + [source,synonyms] ---- diff --git a/docs/reference/search/search-your-data/search-with-synonyms.asciidoc b/docs/reference/search/search-your-data/search-with-synonyms.asciidoc index 596af695b7910..61d3a1d8f925b 100644 --- a/docs/reference/search/search-your-data/search-with-synonyms.asciidoc +++ b/docs/reference/search/search-your-data/search-with-synonyms.asciidoc @@ -82,6 +82,19 @@ If an index is created referencing a nonexistent synonyms set, the index will re The only way to recover from this scenario is to ensure the synonyms set exists then either delete and re-create the index, or close and re-open the index. ====== +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + {es} uses synonyms as part of the <>. You can use two types of <> to include synonyms: diff --git a/docs/reference/synonyms/apis/synonyms-apis.asciidoc b/docs/reference/synonyms/apis/synonyms-apis.asciidoc index c9de52939b2fe..dbbc26c36d3df 100644 --- a/docs/reference/synonyms/apis/synonyms-apis.asciidoc +++ b/docs/reference/synonyms/apis/synonyms-apis.asciidoc @@ -21,6 +21,23 @@ These filters are applied as part of the <> process by the << NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set. If you need to manage more synonym rules, you can create multiple synonyms sets. +WARNING: Synonyms sets must exist before they can be added to indices. +If an index is created referencing a nonexistent synonyms set, the index will remain in a partially created and inoperable state. +The only way to recover from this scenario is to ensure the synonyms set exists then either delete and re-create the index, or close and re-open the index. + +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + [discrete] [[synonyms-sets-apis]] === Synonyms sets APIs From 67493711f2c0560dc8366cd620e4250e5a083a55 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 18 Jul 2024 10:37:23 +0200 Subject: [PATCH 41/65] Unmute EsqlNodeSubclassTests (#110985) The test failure was already fixed in https://github.com/elastic/elasticsearch/pull/110968. --- muted-tests.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 320fc00c5d943..d91c462ac22c5 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -102,12 +102,6 @@ tests: method: "testNotMatchSome {p0=StandardSetup[fieldType=keyword, multivaluedField=true, empty=true, count=100]}" - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT issue: https://github.com/elastic/elasticsearch/issues/110978 -- class: org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests - method: testInfoParameters {class org.elasticsearch.xpack.esql.plan.physical.MvExpandExec} - issue: https://github.com/elastic/elasticsearch/issues/110980 -- class: org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests - method: testInfoParameters {class org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec} - issue: https://github.com/elastic/elasticsearch/issues/110981 - class: org.elasticsearch.search.sort.FieldSortIT method: testIssue6614 issue: https://github.com/elastic/elasticsearch/issues/110999 From fe1b7f1091f8ddac00a610d71e2a200e17c3a56d Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 18 Jul 2024 09:54:52 +0100 Subject: [PATCH 42/65] Combine per-repo results in get-snapshots action (#111004) With #107191 we can now safely accumulate results from all targetted repositories as they're built, rather than staging each repository's results in intermediate lists in case of failure. --- .../get/TransportGetSnapshotsAction.java | 30 ++++--------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java index 384a004861776..c4f3f3cddf2ec 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java @@ -53,14 +53,12 @@ import org.elasticsearch.transport.TransportService; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Queue; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiPredicate; @@ -182,19 +180,13 @@ private class GetSnapshotsOperation { private final GetSnapshotInfoExecutor getSnapshotInfoExecutor; // results - private final Queue> allSnapshotInfos = ConcurrentCollections.newQueue(); + private final List allSnapshotInfos = Collections.synchronizedList(new ArrayList<>()); /** * Accumulates number of snapshots that match the name/fromSortValue/slmPolicy predicates, to be returned in the response. */ private final AtomicInteger totalCount = new AtomicInteger(); - /** - * Accumulates the number of snapshots that match the name/fromSortValue/slmPolicy/after predicates, for sizing the final result - * list. - */ - private final AtomicInteger resultsCount = new AtomicInteger(); - GetSnapshotsOperation( CancellableTask cancellableTask, List repositories, @@ -438,18 +430,7 @@ private void loadSnapshotInfos(Iterator asyncSnapshotInfoIter if (cancellableTask.notifyIfCancelled(listener)) { return; } - final var repositoryTotalCount = new AtomicInteger(); - - final List snapshots = new ArrayList<>(); - final List syncSnapshots = Collections.synchronizedList(snapshots); - try (var listeners = new RefCountingListener(listener)) { - final var iterationCompleteListener = listeners.acquire(ignored -> { - totalCount.addAndGet(repositoryTotalCount.get()); - // no need to synchronize access to snapshots: all writes happen-before this read - resultsCount.addAndGet(snapshots.size()); - allSnapshotInfos.add(snapshots); - }); ThrottledIterator.run( Iterators.failFast(asyncSnapshotInfoIterator, () -> cancellableTask.isCancelled() || listeners.isFailing()), (ref, asyncSnapshotInfo) -> { @@ -458,9 +439,9 @@ private void loadSnapshotInfos(Iterator asyncSnapshotInfoIter @Override public void onResponse(SnapshotInfo snapshotInfo) { if (matchesPredicates(snapshotInfo)) { - repositoryTotalCount.incrementAndGet(); + totalCount.incrementAndGet(); if (afterPredicate.test(snapshotInfo)) { - syncSnapshots.add(snapshotInfo.maybeWithoutIndices(indices)); + allSnapshotInfos.add(snapshotInfo.maybeWithoutIndices(indices)); } } refListener.onResponse(null); @@ -479,7 +460,7 @@ public void onFailure(Exception e) { }, getSnapshotInfoExecutor.getMaxRunningTasks(), () -> {}, - () -> iterationCompleteListener.onResponse(null) + () -> {} ); } } @@ -489,12 +470,11 @@ private GetSnapshotsResponse buildResponse() { cancellableTask.ensureNotCancelled(); int remaining = 0; final var resultsStream = allSnapshotInfos.stream() - .flatMap(Collection::stream) .peek(this::assertSatisfiesAllPredicates) .sorted(sortBy.getSnapshotInfoComparator(order)) .skip(offset); final List snapshotInfos; - if (size == GetSnapshotsRequest.NO_LIMIT || resultsCount.get() <= size) { + if (size == GetSnapshotsRequest.NO_LIMIT || allSnapshotInfos.size() <= size) { snapshotInfos = resultsStream.toList(); } else { snapshotInfos = new ArrayList<>(size); From 55f55026570d62fb371831d274db384e9946e988 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 18 Jul 2024 10:52:17 +0100 Subject: [PATCH 43/65] Add docs on threading in get-snapshots action (#111006) The threading here is kinda subtle these days. This commit adds some more docs and assertions in this area. --- .../get/TransportGetSnapshotsAction.java | 58 +++++++++++++++---- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java index c4f3f3cddf2ec..213c8003b7047 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java @@ -76,6 +76,36 @@ public class TransportGetSnapshotsAction extends TransportMasterNodeAction listener) } }) - .addListener(listener.map(ignored -> buildResponse()), executor, threadPool.getThreadContext()); + .addListener( + listener.map(ignored -> buildResponse()), + // If we didn't load any SnapshotInfo blobs from the repo (e.g. verbose=false or current-snapshots-only) then this + // listener chain will already be complete, no need to fork again. Otherwise we forked to SNAPSHOT_META so must + // fork back to MANAGEMENT for the final step. + executor, + threadPool.getThreadContext() + ); } private void maybeGetRepositoryData(String repositoryName, ActionListener listener) { @@ -326,6 +358,7 @@ private AsyncSnapshotInfo forSnapshotInProgress(SnapshotsInProgress.Entry snapsh return new AsyncSnapshotInfo() { @Override public void getSnapshotInfo(ActionListener listener) { + assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); // see [NOTE ON THREADING] final var snapshotInfo = SnapshotInfo.inProgress(snapshotInProgress); listener.onResponse(verbose ? snapshotInfo : snapshotInfo.basic()); } @@ -350,8 +383,11 @@ private AsyncSnapshotInfo forCompletedSnapshot( @Override public void getSnapshotInfo(ActionListener listener) { if (verbose) { + // always forks to SNAPSHOT_META, and may already have done so for an earlier item - see [NOTE ON THREADING] + assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT, ThreadPool.Names.SNAPSHOT_META); getSnapshotInfoExecutor.getSnapshotInfo(repository, snapshotId, listener); } else { + assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); // see [NOTE ON THREADING] ActionListener.completeWith( listener, () -> new SnapshotInfo( @@ -394,9 +430,11 @@ private Iterator getAsyncSnapshotInfoIterator(Repository repo this::forSnapshotInProgress ), repositoryData == null - // only returning in-progress snapshots + // Only returning in-progress snapshots: ? Collections.emptyIterator() - // also return matching completed snapshots (except any ones that were also found to be in-progress) + // Also return matching completed snapshots (except any ones that were also found to be in-progress). + // NB this will fork tasks to SNAPSHOT_META (if verbose=true) which will be used for subsequent items so we mustn't + // follow it with any more non-forking iteration. See [NOTE ON THREADING]. : Iterators.map( Iterators.filter( repositoryData.getSnapshotIds().iterator(), @@ -466,7 +504,7 @@ public void onFailure(Exception e) { } private GetSnapshotsResponse buildResponse() { - assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); + assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); // see [NOTE ON THREADING] cancellableTask.ensureNotCancelled(); int remaining = 0; final var resultsStream = allSnapshotInfos.stream() From 5b606b57992badb9dfa32a554025140c653b392a Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Thu, 18 Jul 2024 11:22:10 +0100 Subject: [PATCH 44/65] Update known-issues for the features upgrade bug, and increase scope to include 8.12.x (#111014) --- docs/reference/release-notes/8.12.0.asciidoc | 7 +++++++ docs/reference/release-notes/8.12.1.asciidoc | 10 ++++++++++ docs/reference/release-notes/8.12.2.asciidoc | 10 ++++++++++ docs/reference/release-notes/8.13.0.asciidoc | 6 +++--- docs/reference/release-notes/8.13.1.asciidoc | 6 +++--- docs/reference/release-notes/8.13.2.asciidoc | 6 +++--- docs/reference/release-notes/8.13.3.asciidoc | 6 +++--- docs/reference/release-notes/8.13.4.asciidoc | 6 +++--- docs/reference/release-notes/8.14.0.asciidoc | 6 +++--- docs/reference/release-notes/8.14.1.asciidoc | 6 +++--- docs/reference/release-notes/8.14.2.asciidoc | 6 +++--- docs/reference/release-notes/8.14.3.asciidoc | 10 ++++++++++ 12 files changed, 61 insertions(+), 24 deletions(-) diff --git a/docs/reference/release-notes/8.12.0.asciidoc b/docs/reference/release-notes/8.12.0.asciidoc index 4c0fc50584b9f..bfa99401f41a2 100644 --- a/docs/reference/release-notes/8.12.0.asciidoc +++ b/docs/reference/release-notes/8.12.0.asciidoc @@ -14,6 +14,13 @@ there are deleted documents in the segments, quantiles may fail to build and pre This issue is fixed in 8.12.1. +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[breaking-8.12.0]] [float] === Breaking changes diff --git a/docs/reference/release-notes/8.12.1.asciidoc b/docs/reference/release-notes/8.12.1.asciidoc index 9aa9a11b3bf02..8ebe5cbac3852 100644 --- a/docs/reference/release-notes/8.12.1.asciidoc +++ b/docs/reference/release-notes/8.12.1.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.12.1]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.12.1]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.12.2.asciidoc b/docs/reference/release-notes/8.12.2.asciidoc index 2be8449b6c1df..44202ee8226eb 100644 --- a/docs/reference/release-notes/8.12.2.asciidoc +++ b/docs/reference/release-notes/8.12.2.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.12.2]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.12.2]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.13.0.asciidoc b/docs/reference/release-notes/8.13.0.asciidoc index 197a417e0eff4..75e2341f33766 100644 --- a/docs/reference/release-notes/8.13.0.asciidoc +++ b/docs/reference/release-notes/8.13.0.asciidoc @@ -21,12 +21,12 @@ This affects clusters running version 8.10 or later, with an active downsampling https://www.elastic.co/guide/en/elasticsearch/reference/current/downsampling-ilm.html[configuration] or a configuration that was activated at some point since upgrading to version 8.10 or later. -* When upgrading clusters from version 8.12.2 or earlier, if your cluster contains non-master-eligible nodes, +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, information about the new functionality of these upgraded nodes may not be registered properly with the master node. -This can lead to some new functionality added since 8.13.0 not being accessible on the upgraded cluster. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes -are upgraded. +are upgraded. This issue is fixed in 8.15.0. * The `pytorch_inference` process used to run Machine Learning models can consume large amounts of memory. In environments where the available memory is limited, the OS Out of Memory Killer will kill the `pytorch_inference` diff --git a/docs/reference/release-notes/8.13.1.asciidoc b/docs/reference/release-notes/8.13.1.asciidoc index f176c124e5e3b..c654af3dd5cc0 100644 --- a/docs/reference/release-notes/8.13.1.asciidoc +++ b/docs/reference/release-notes/8.13.1.asciidoc @@ -6,12 +6,12 @@ Also see <>. [[known-issues-8.13.1]] [float] === Known issues -* When upgrading clusters from version 8.12.2 or earlier, if your cluster contains non-master-eligible nodes, +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, information about the new functionality of these upgraded nodes may not be registered properly with the master node. -This can lead to some new functionality added since 8.13.0 not being accessible on the upgraded cluster. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes -are upgraded. +are upgraded. This issue is fixed in 8.15.0. * The `pytorch_inference` process used to run Machine Learning models can consume large amounts of memory. In environments where the available memory is limited, the OS Out of Memory Killer will kill the `pytorch_inference` diff --git a/docs/reference/release-notes/8.13.2.asciidoc b/docs/reference/release-notes/8.13.2.asciidoc index c4340a200e0c5..f4540343ca9ea 100644 --- a/docs/reference/release-notes/8.13.2.asciidoc +++ b/docs/reference/release-notes/8.13.2.asciidoc @@ -6,12 +6,12 @@ Also see <>. [[known-issues-8.13.2]] [float] === Known issues -* When upgrading clusters from version 8.12.2 or earlier, if your cluster contains non-master-eligible nodes, +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, information about the new functionality of these upgraded nodes may not be registered properly with the master node. -This can lead to some new functionality added since 8.13.0 not being accessible on the upgraded cluster. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes -are upgraded. +are upgraded. This issue is fixed in 8.15.0. * The `pytorch_inference` process used to run Machine Learning models can consume large amounts of memory. In environments where the available memory is limited, the OS Out of Memory Killer will kill the `pytorch_inference` diff --git a/docs/reference/release-notes/8.13.3.asciidoc b/docs/reference/release-notes/8.13.3.asciidoc index 759b879e16685..f1bb4211f4676 100644 --- a/docs/reference/release-notes/8.13.3.asciidoc +++ b/docs/reference/release-notes/8.13.3.asciidoc @@ -13,12 +13,12 @@ SQL:: [[known-issues-8.13.3]] [float] === Known issues -* When upgrading clusters from version 8.12.2 or earlier, if your cluster contains non-master-eligible nodes, +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, information about the new functionality of these upgraded nodes may not be registered properly with the master node. -This can lead to some new functionality added since 8.13.0 not being accessible on the upgraded cluster. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes -are upgraded. +are upgraded. This issue is fixed in 8.15.0. * The `pytorch_inference` process used to run Machine Learning models can consume large amounts of memory. In environments where the available memory is limited, the OS Out of Memory Killer will kill the `pytorch_inference` diff --git a/docs/reference/release-notes/8.13.4.asciidoc b/docs/reference/release-notes/8.13.4.asciidoc index d8d0b632c734a..446aae048945b 100644 --- a/docs/reference/release-notes/8.13.4.asciidoc +++ b/docs/reference/release-notes/8.13.4.asciidoc @@ -6,12 +6,12 @@ Also see <>. [[known-issues-8.13.4]] [float] === Known issues -* When upgrading clusters from version 8.12.2 or earlier, if your cluster contains non-master-eligible nodes, +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, information about the new functionality of these upgraded nodes may not be registered properly with the master node. -This can lead to some new functionality added since 8.13.0 not being accessible on the upgraded cluster. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes -are upgraded. +are upgraded. This issue is fixed in 8.15.0. * The `pytorch_inference` process used to run Machine Learning models can consume large amounts of memory. In environments where the available memory is limited, the OS Out of Memory Killer will kill the `pytorch_inference` diff --git a/docs/reference/release-notes/8.14.0.asciidoc b/docs/reference/release-notes/8.14.0.asciidoc index 87b931fd05906..c2fee6ecaa07a 100644 --- a/docs/reference/release-notes/8.14.0.asciidoc +++ b/docs/reference/release-notes/8.14.0.asciidoc @@ -15,12 +15,12 @@ Security:: [[known-issues-8.14.0]] [float] === Known issues -* When upgrading clusters from version 8.12.2 or earlier, if your cluster contains non-master-eligible nodes, +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, information about the new functionality of these upgraded nodes may not be registered properly with the master node. -This can lead to some new functionality added since 8.13.0 not being accessible on the upgraded cluster. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes -are upgraded. +are upgraded. This issue is fixed in 8.15.0. * The `pytorch_inference` process used to run Machine Learning models can consume large amounts of memory. In environments where the available memory is limited, the OS Out of Memory Killer will kill the `pytorch_inference` diff --git a/docs/reference/release-notes/8.14.1.asciidoc b/docs/reference/release-notes/8.14.1.asciidoc index b35c1e651c767..de3ecd210b488 100644 --- a/docs/reference/release-notes/8.14.1.asciidoc +++ b/docs/reference/release-notes/8.14.1.asciidoc @@ -7,12 +7,12 @@ Also see <>. [[known-issues-8.14.1]] [float] === Known issues -* When upgrading clusters from version 8.12.2 or earlier, if your cluster contains non-master-eligible nodes, +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, information about the new functionality of these upgraded nodes may not be registered properly with the master node. -This can lead to some new functionality added since 8.13.0 not being accessible on the upgraded cluster. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes -are upgraded. +are upgraded. This issue is fixed in 8.15.0. * The `pytorch_inference` process used to run Machine Learning models can consume large amounts of memory. In environments where the available memory is limited, the OS Out of Memory Killer will kill the `pytorch_inference` diff --git a/docs/reference/release-notes/8.14.2.asciidoc b/docs/reference/release-notes/8.14.2.asciidoc index 9c21cf6de466c..f3f0651508dca 100644 --- a/docs/reference/release-notes/8.14.2.asciidoc +++ b/docs/reference/release-notes/8.14.2.asciidoc @@ -6,12 +6,12 @@ Also see <>. [[known-issues-8.14.2]] [float] === Known issues -* When upgrading clusters from version 8.12.2 or earlier, if your cluster contains non-master-eligible nodes, +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, information about the new functionality of these upgraded nodes may not be registered properly with the master node. -This can lead to some new functionality added since 8.13.0 not being accessible on the upgraded cluster. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes -are upgraded. +are upgraded. This issue is fixed in 8.15.0. * The `pytorch_inference` process used to run Machine Learning models can consume large amounts of memory. In environments where the available memory is limited, the OS Out of Memory Killer will kill the `pytorch_inference` diff --git a/docs/reference/release-notes/8.14.3.asciidoc b/docs/reference/release-notes/8.14.3.asciidoc index 0d7d2d9d599c1..17c53faa4a37f 100644 --- a/docs/reference/release-notes/8.14.3.asciidoc +++ b/docs/reference/release-notes/8.14.3.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.14.3]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.14.3]] [float] === Bug fixes From ac2afd7633a42018e57c2f5c72b41f6a726c0b17 Mon Sep 17 00:00:00 2001 From: Salvatore Campagna <93581129+salvatore-campagna@users.noreply.github.com> Date: Thu, 18 Jul 2024 12:47:51 +0200 Subject: [PATCH 45/65] Inject `host.name` field without relying on (component) templates (#110938) We do not want to rely on templates or component templates to include the host.name field in indices using LogsDB. The host.name field is a field we sort on by default when LogsDB is used. As a result, we just inject it by default, the same way we do for the @timestamp field. This prevents sorting errors due to missing host.name field in mappings. The host.name is a keyword field and depending on the value of subobjects it will be mapped as a name keyword nested inside a host or as a flat host.name keyword. We also include ignore_above as we normally do for keywords in observability mappings. --- .../rest-api-spec/test/logsdb/10_settings.yml | 74 ++++++++++--------- .../org/elasticsearch/index/IndexMode.java | 5 ++ .../main/resources/logs@mappings-logsdb.json | 31 -------- .../xpack/stack/StackTemplateRegistry.java | 2 +- 4 files changed, 44 insertions(+), 68 deletions(-) delete mode 100644 x-pack/plugin/core/template-resources/src/main/resources/logs@mappings-logsdb.json diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml index 4976e5e15adbe..429fdba2a0562 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml @@ -114,42 +114,44 @@ using default timestamp field mapping: message: type: text ---- -missing hostname field: - - requires: - test_runner_features: [ capabilities ] - capabilities: - - method: PUT - path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" - - - do: - catch: bad_request - indices.create: - index: test-hostname-missing - body: - settings: - index: - mode: logs - number_of_replicas: 0 - number_of_shards: 2 - mappings: - properties: - "@timestamp": - type: date - agent_id: - type: keyword - process_id: - type: integer - http_method: - type: keyword - message: - type: text - - - match: { error.root_cause.0.type: "illegal_argument_exception" } - - match: { error.type: "illegal_argument_exception" } - - match: { error.reason: "unknown index sort field:[host.name]" } +#--- +#missing hostname field: +# - requires: +# test_runner_features: [ capabilities ] +# capabilities: +# - method: PUT +# path: /{index} +# capabilities: [ logs_index_mode ] +# reason: "Support for 'logs' index mode capability required" +# +# - do: +# indices.create: +# index: test-hostname-missing +# body: +# settings: +# index: +# mode: logs +# number_of_replicas: 0 +# number_of_shards: 2 +# mappings: +# properties: +# "@timestamp": +# type: date +# agent_id: +# type: keyword +# process_id: +# type: integer +# http_method: +# type: keyword +# message: +# type: text +# +# - do: +# indices.get_settings: +# index: test-hostname-missing +# +# - is_true: test-hostname-missing +# - match: { test-hostname-missing.settings.index.mode: "logs" } --- missing sort field: diff --git a/server/src/main/java/org/elasticsearch/index/IndexMode.java b/server/src/main/java/org/elasticsearch/index/IndexMode.java index a0420fdc5e0ff..1fcaf1394d4a7 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexMode.java +++ b/server/src/main/java/org/elasticsearch/index/IndexMode.java @@ -20,6 +20,7 @@ import org.elasticsearch.index.mapper.DocumentDimensions; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.mapper.MetadataFieldMapper; @@ -349,6 +350,10 @@ protected static String tsdbMode() { .startObject(DataStreamTimestampFieldMapper.DEFAULT_PATH) .field("type", DateFieldMapper.CONTENT_TYPE) .endObject() + .startObject("host.name") + .field("type", KeywordFieldMapper.CONTENT_TYPE) + .field("ignore_above", 1024) + .endObject() .endObject() .endObject()) ); diff --git a/x-pack/plugin/core/template-resources/src/main/resources/logs@mappings-logsdb.json b/x-pack/plugin/core/template-resources/src/main/resources/logs@mappings-logsdb.json deleted file mode 100644 index 167efbd3ffaf5..0000000000000 --- a/x-pack/plugin/core/template-resources/src/main/resources/logs@mappings-logsdb.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "template": { - "mappings": { - "date_detection": false, - "properties": { - "@timestamp": { - "type": "date" - }, - "host.name": { - "type": "keyword" - }, - "data_stream.type": { - "type": "constant_keyword", - "value": "logs" - }, - "data_stream.dataset": { - "type": "constant_keyword" - }, - "data_stream.namespace": { - "type": "constant_keyword" - } - } - } - }, - "_meta": { - "description": "default mappings for the logs index template installed by x-pack", - "managed": true - }, - "version": ${xpack.stack.template.version}, - "deprecated": ${xpack.stack.template.deprecated} -} diff --git a/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java b/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java index 648146ccdcc61..7dc1dfb6cf3df 100644 --- a/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java +++ b/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java @@ -146,7 +146,7 @@ private Map loadComponentTemplateConfigs(boolean logs ), new IndexTemplateConfig( LOGS_MAPPINGS_COMPONENT_TEMPLATE_NAME, - logsDbEnabled ? "/logs@mappings-logsdb.json" : "/logs@mappings.json", + "/logs@mappings.json", REGISTRY_VERSION, TEMPLATE_VERSION_VARIABLE, ADDITIONAL_TEMPLATE_VARIABLES From 251c65266c53b2b59f9f923a9b5dc38f75342aa6 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 18 Jul 2024 21:26:05 +1000 Subject: [PATCH 46/65] Use enums instead of booleans in TransportReplicationAction (#111017) Relates to: #109414 --- ...tReplicationActionRetryOnClosedNodeIT.java | 4 +- ...TransportVerifyShardBeforeCloseAction.java | 4 +- .../flush/TransportShardFlushAction.java | 4 +- .../TransportVerifyShardIndexBlockAction.java | 4 +- .../refresh/TransportShardRefreshAction.java | 4 +- .../action/bulk/TransportShardBulkAction.java | 2 +- .../TransportResyncReplicationAction.java | 2 +- .../TransportReplicationAction.java | 74 ++++++++++--------- .../replication/TransportWriteAction.java | 6 +- .../seqno/GlobalCheckpointSyncAction.java | 4 +- .../RetentionLeaseBackgroundSyncAction.java | 4 +- .../index/seqno/RetentionLeaseSyncAction.java | 2 +- .../TransportReplicationActionTests.java | 24 ++++-- ...ReplicationAllPermitsAcquisitionTests.java | 4 +- .../TransportWriteActionTests.java | 4 +- .../TransportBulkShardOperationsAction.java | 2 +- 16 files changed, 90 insertions(+), 58 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/support/replication/TransportReplicationActionRetryOnClosedNodeIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/support/replication/TransportReplicationActionRetryOnClosedNodeIT.java index b89cea7dff089..c4737468a766c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/support/replication/TransportReplicationActionRetryOnClosedNodeIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/support/replication/TransportReplicationActionRetryOnClosedNodeIT.java @@ -105,7 +105,9 @@ public TestAction( actionFilters, Request::new, Request::new, - threadPool.executor(ThreadPool.Names.GENERIC) + threadPool.executor(ThreadPool.Names.GENERIC), + SyncGlobalCheckpointAfterOperation.DoNotSync, + PrimaryActionExecution.RejectOnOverload ); } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportVerifyShardBeforeCloseAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportVerifyShardBeforeCloseAction.java index ac2f437f7225a..643f92ec3378f 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportVerifyShardBeforeCloseAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportVerifyShardBeforeCloseAction.java @@ -66,7 +66,9 @@ public TransportVerifyShardBeforeCloseAction( actionFilters, ShardRequest::new, ShardRequest::new, - threadPool.executor(ThreadPool.Names.MANAGEMENT) + threadPool.executor(ThreadPool.Names.MANAGEMENT), + SyncGlobalCheckpointAfterOperation.DoNotSync, + PrimaryActionExecution.RejectOnOverload ); } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportShardFlushAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportShardFlushAction.java index 74ae53f7ac9de..69e1309b89aef 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportShardFlushAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/flush/TransportShardFlushAction.java @@ -58,7 +58,9 @@ public TransportShardFlushAction( actionFilters, ShardFlushRequest::new, ShardFlushRequest::new, - threadPool.executor(ThreadPool.Names.FLUSH) + threadPool.executor(ThreadPool.Names.FLUSH), + SyncGlobalCheckpointAfterOperation.DoNotSync, + PrimaryActionExecution.RejectOnOverload ); transportService.registerRequestHandler( PRE_SYNCED_FLUSH_ACTION_NAME, diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportVerifyShardIndexBlockAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportVerifyShardIndexBlockAction.java index 31e9f959f0fe7..e93b3983ee85b 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportVerifyShardIndexBlockAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportVerifyShardIndexBlockAction.java @@ -67,7 +67,9 @@ public TransportVerifyShardIndexBlockAction( actionFilters, ShardRequest::new, ShardRequest::new, - threadPool.executor(ThreadPool.Names.MANAGEMENT) + threadPool.executor(ThreadPool.Names.MANAGEMENT), + SyncGlobalCheckpointAfterOperation.DoNotSync, + PrimaryActionExecution.RejectOnOverload ); } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java index b3e6385e7099d..cc4edcf0efb81 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportShardRefreshAction.java @@ -68,7 +68,9 @@ public TransportShardRefreshAction( actionFilters, BasicReplicationRequest::new, ShardRefreshReplicaRequest::new, - threadPool.executor(ThreadPool.Names.REFRESH) + threadPool.executor(ThreadPool.Names.REFRESH), + SyncGlobalCheckpointAfterOperation.DoNotSync, + PrimaryActionExecution.RejectOnOverload ); // registers the unpromotable version of shard refresh action new TransportUnpromotableShardRefreshAction(clusterService, transportService, shardStateAction, actionFilters, indicesService); diff --git a/server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java b/server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java index 67e7e3be72a02..fc9df7bbf73b9 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java @@ -115,7 +115,7 @@ public TransportShardBulkAction( BulkShardRequest::new, BulkShardRequest::new, ExecutorSelector.getWriteExecutorForShard(threadPool), - false, + PrimaryActionExecution.RejectOnOverload, indexingPressure, systemIndices ); diff --git a/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java b/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java index 4684c990299f9..5a891f33480fa 100644 --- a/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java +++ b/server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java @@ -71,7 +71,7 @@ public TransportResyncReplicationAction( ResyncReplicationRequest::new, ResyncReplicationRequest::new, ExecutorSelector.getWriteExecutorForShard(threadPool), - true, /* we should never reject resync because of thread pool capacity on primary */ + PrimaryActionExecution.Force, /* we should never reject resync because of thread pool capacity on primary */ indexingPressure, systemIndices ); diff --git a/server/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java b/server/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java index ac5b004886319..c2d7e173fd0bf 100644 --- a/server/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java +++ b/server/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java @@ -89,6 +89,34 @@ public abstract class TransportReplicationAction< ReplicaRequest extends ReplicationRequest, Response extends ReplicationResponse> extends TransportAction { + /** + * Execution of the primary action + */ + protected enum PrimaryActionExecution { + /** + * Is subject to usual queue length and indexing pressure checks + */ + RejectOnOverload, + /** + * Will be "forced" (bypassing queue length and indexing pressure checks) + */ + Force + } + + /** + * Global checkpoint behaviour + */ + protected enum SyncGlobalCheckpointAfterOperation { + /** + * Do not sync as part of this action + */ + DoNotSync, + /** + * Attempt to sync the global checkpoint to the replica(s) after success + */ + AttemptAfterSuccess + } + /** * The timeout for retrying replication requests. */ @@ -128,36 +156,6 @@ public abstract class TransportReplicationAction< private volatile TimeValue initialRetryBackoffBound; private volatile TimeValue retryTimeout; - protected TransportReplicationAction( - Settings settings, - String actionName, - TransportService transportService, - ClusterService clusterService, - IndicesService indicesService, - ThreadPool threadPool, - ShardStateAction shardStateAction, - ActionFilters actionFilters, - Writeable.Reader requestReader, - Writeable.Reader replicaRequestReader, - Executor executor - ) { - this( - settings, - actionName, - transportService, - clusterService, - indicesService, - threadPool, - shardStateAction, - actionFilters, - requestReader, - replicaRequestReader, - executor, - false, - false - ); - } - @SuppressWarnings("this-escape") protected TransportReplicationAction( Settings settings, @@ -171,10 +169,12 @@ protected TransportReplicationAction( Writeable.Reader requestReader, Writeable.Reader replicaRequestReader, Executor executor, - boolean syncGlobalCheckpointAfterOperation, - boolean forceExecutionOnPrimary + SyncGlobalCheckpointAfterOperation syncGlobalCheckpointAfterOperation, + PrimaryActionExecution primaryActionExecution ) { super(actionName, actionFilters, transportService.getTaskManager()); + assert syncGlobalCheckpointAfterOperation != null : "Must specify global checkpoint sync behaviour"; + assert primaryActionExecution != null : "Must specify primary action execution behaviour"; this.threadPool = threadPool; this.transportService = transportService; this.clusterService = clusterService; @@ -187,7 +187,10 @@ protected TransportReplicationAction( this.initialRetryBackoffBound = REPLICATION_INITIAL_RETRY_BACKOFF_BOUND.get(settings); this.retryTimeout = REPLICATION_RETRY_TIMEOUT.get(settings); - this.forceExecutionOnPrimary = forceExecutionOnPrimary; + this.forceExecutionOnPrimary = switch (primaryActionExecution) { + case Force -> true; + case RejectOnOverload -> false; + }; transportService.registerRequestHandler( actionName, @@ -217,7 +220,10 @@ protected TransportReplicationAction( this.transportOptions = transportOptions(); - this.syncGlobalCheckpointAfterOperation = syncGlobalCheckpointAfterOperation; + this.syncGlobalCheckpointAfterOperation = switch (syncGlobalCheckpointAfterOperation) { + case AttemptAfterSuccess -> true; + case DoNotSync -> false; + }; ClusterSettings clusterSettings = clusterService.getClusterSettings(); clusterSettings.addSettingsUpdateConsumer(REPLICATION_INITIAL_RETRY_BACKOFF_BOUND, (v) -> initialRetryBackoffBound = v); diff --git a/server/src/main/java/org/elasticsearch/action/support/replication/TransportWriteAction.java b/server/src/main/java/org/elasticsearch/action/support/replication/TransportWriteAction.java index 8994b428adcbe..f380710cc0794 100644 --- a/server/src/main/java/org/elasticsearch/action/support/replication/TransportWriteAction.java +++ b/server/src/main/java/org/elasticsearch/action/support/replication/TransportWriteAction.java @@ -76,7 +76,7 @@ protected TransportWriteAction( Writeable.Reader request, Writeable.Reader replicaRequest, BiFunction executorFunction, - boolean forceExecutionOnPrimary, + PrimaryActionExecution primaryActionExecution, IndexingPressure indexingPressure, SystemIndices systemIndices ) { @@ -94,8 +94,8 @@ protected TransportWriteAction( request, replicaRequest, EsExecutors.DIRECT_EXECUTOR_SERVICE, - true, - forceExecutionOnPrimary + SyncGlobalCheckpointAfterOperation.AttemptAfterSuccess, + primaryActionExecution ); this.executorFunction = executorFunction; this.indexingPressure = indexingPressure; diff --git a/server/src/main/java/org/elasticsearch/index/seqno/GlobalCheckpointSyncAction.java b/server/src/main/java/org/elasticsearch/index/seqno/GlobalCheckpointSyncAction.java index 7d3df2c174a83..a051d9c2df430 100644 --- a/server/src/main/java/org/elasticsearch/index/seqno/GlobalCheckpointSyncAction.java +++ b/server/src/main/java/org/elasticsearch/index/seqno/GlobalCheckpointSyncAction.java @@ -63,8 +63,8 @@ public GlobalCheckpointSyncAction( Request::new, Request::new, threadPool.executor(ThreadPool.Names.WRITE), - false, - true + SyncGlobalCheckpointAfterOperation.DoNotSync, + PrimaryActionExecution.Force ); } diff --git a/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncAction.java b/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncAction.java index 541e279d4cfbb..0aa0f0b8d1556 100644 --- a/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncAction.java +++ b/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncAction.java @@ -81,7 +81,9 @@ public RetentionLeaseBackgroundSyncAction( actionFilters, Request::new, Request::new, - threadPool.executor(ThreadPool.Names.MANAGEMENT) + threadPool.executor(ThreadPool.Names.MANAGEMENT), + SyncGlobalCheckpointAfterOperation.DoNotSync, + PrimaryActionExecution.RejectOnOverload ); } diff --git a/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseSyncAction.java b/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseSyncAction.java index b5fe27fb20bc3..0efcf8ac9298b 100644 --- a/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseSyncAction.java +++ b/server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseSyncAction.java @@ -91,7 +91,7 @@ public RetentionLeaseSyncAction( RetentionLeaseSyncAction.Request::new, RetentionLeaseSyncAction.Request::new, new ManagementOnlyExecutorFunction(threadPool), - false, + PrimaryActionExecution.RejectOnOverload, indexingPressure, systemIndices ); diff --git a/server/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationActionTests.java b/server/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationActionTests.java index fcbddb581946b..04ad7d410e9b0 100644 --- a/server/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationActionTests.java @@ -144,7 +144,7 @@ public static > R resolveRequest(TransportReques private static ThreadPool threadPool; - private boolean forceExecute; + private TransportReplicationAction.PrimaryActionExecution primaryActionExecution; private ClusterService clusterService; private TransportService transportService; private CapturingTransport transport; @@ -165,7 +165,7 @@ public static void beforeClass() { @Before public void setUp() throws Exception { super.setUp(); - forceExecute = randomBoolean(); + primaryActionExecution = randomFrom(TransportReplicationAction.PrimaryActionExecution.values()); transport = new CapturingTransport(); clusterService = createClusterService(threadPool); transportService = transport.createTransportService( @@ -951,7 +951,7 @@ public void testSeqNoIsSetOnPrimary() { ActionListener argument = (ActionListener) invocation.getArguments()[0]; argument.onResponse(count::decrementAndGet); return null; - }).when(shard).acquirePrimaryOperationPermit(any(), any(Executor.class), eq(forceExecute)); + }).when(shard).acquirePrimaryOperationPermit(any(), any(Executor.class), eq(shouldForceAcquirePermit(primaryActionExecution))); when(shard.getActiveOperationsCount()).thenAnswer(i -> count.get()); final IndexService indexService = mock(IndexService.class); @@ -979,6 +979,13 @@ public void testSeqNoIsSetOnPrimary() { assertThat(shardRequest.getPrimaryTerm(), equalTo(primaryTerm)); } + private boolean shouldForceAcquirePermit(TransportReplicationAction.PrimaryActionExecution primaryActionExecution) { + return switch (primaryActionExecution) { + case Force -> true; + case RejectOnOverload -> false; + }; + } + public void testCounterOnPrimary() throws Exception { final String index = "test"; final ShardId shardId = new ShardId(index, "_na_", 0); @@ -1511,8 +1518,8 @@ private class TestAction extends TransportReplicationAction isPrimaryMode.get()); doAnswer(invocation -> { long term = (Long) invocation.getArguments()[0]; diff --git a/server/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationAllPermitsAcquisitionTests.java b/server/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationAllPermitsAcquisitionTests.java index d0ae26f97917a..c5642fd9681ac 100644 --- a/server/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationAllPermitsAcquisitionTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/replication/TransportReplicationAllPermitsAcquisitionTests.java @@ -456,7 +456,9 @@ private abstract class TestAction extends TransportReplicationAction()), Request::new, Request::new, - EsExecutors.DIRECT_EXECUTOR_SERVICE + EsExecutors.DIRECT_EXECUTOR_SERVICE, + SyncGlobalCheckpointAfterOperation.DoNotSync, + PrimaryActionExecution.RejectOnOverload ); this.shardId = Objects.requireNonNull(shardId); this.primary = Objects.requireNonNull(primary); diff --git a/server/src/test/java/org/elasticsearch/action/support/replication/TransportWriteActionTests.java b/server/src/test/java/org/elasticsearch/action/support/replication/TransportWriteActionTests.java index 37f02035a5f43..5cc0e55942818 100644 --- a/server/src/test/java/org/elasticsearch/action/support/replication/TransportWriteActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/replication/TransportWriteActionTests.java @@ -426,7 +426,7 @@ protected TestAction(boolean withDocumentFailureOnPrimary, boolean withDocumentF TestRequest::new, TestRequest::new, (service, ignore) -> EsExecutors.DIRECT_EXECUTOR_SERVICE, - false, + PrimaryActionExecution.RejectOnOverload, new IndexingPressure(Settings.EMPTY), EmptySystemIndices.INSTANCE ); @@ -454,7 +454,7 @@ protected TestAction( TestRequest::new, TestRequest::new, (service, ignore) -> EsExecutors.DIRECT_EXECUTOR_SERVICE, - false, + PrimaryActionExecution.RejectOnOverload, new IndexingPressure(settings), EmptySystemIndices.INSTANCE ); diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/bulk/TransportBulkShardOperationsAction.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/bulk/TransportBulkShardOperationsAction.java index d9592c3df4950..2d0c43315f746 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/bulk/TransportBulkShardOperationsAction.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/bulk/TransportBulkShardOperationsAction.java @@ -67,7 +67,7 @@ public TransportBulkShardOperationsAction( BulkShardOperationsRequest::new, BulkShardOperationsRequest::new, ExecutorSelector.getWriteExecutorForShard(threadPool), - false, + PrimaryActionExecution.RejectOnOverload, indexingPressure, systemIndices ); From 51d658e3cd31c6e660def6f5e33e39f737d8cfb5 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 18 Jul 2024 12:35:50 +0100 Subject: [PATCH 47/65] Always allow rebalancing by default (#111015) Today `cluster.routing.allocation.allow_rebalance` defaults to `indices_all_active` which blocks all rebalancing moves while the cluster is in `yellow` or `red` health. This was appropriate for the legacy allocator which might do too many rebalancing moves otherwise. The desired-balance allocator has better support for rebalancing a cluster that is not in `green` health, and expects to be able to rebalance some shards away from over-full nodes to avoid allocating shards to undesirable locations in the first place. This commit changes the default `allow_rebalance` setting to `always`. --- docs/changelog/111015.yaml | 15 +++++++ .../cluster/shards_allocation.asciidoc | 4 +- .../ClusterRebalanceAllocationDeciderIT.java | 39 +++++++++++++++++++ .../ClusterRebalanceAllocationDecider.java | 5 ++- .../ClusterRebalanceRoutingTests.java | 32 +++++++++------ 5 files changed, 80 insertions(+), 15 deletions(-) create mode 100644 docs/changelog/111015.yaml create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDeciderIT.java diff --git a/docs/changelog/111015.yaml b/docs/changelog/111015.yaml new file mode 100644 index 0000000000000..3cc363c8bbf6b --- /dev/null +++ b/docs/changelog/111015.yaml @@ -0,0 +1,15 @@ +pr: 111015 +summary: Always allow rebalancing by default +area: Allocation +type: enhancement +issues: [] +highlight: + title: Always allow rebalancing by default + body: |- + In earlier versions of {es} the `cluster.routing.allocation.allow_rebalance` setting defaults to + `indices_all_active` which blocks all rebalancing moves while the cluster is in `yellow` or `red` health. This was + appropriate for the legacy allocator which might do too many rebalancing moves otherwise. Today's allocator has + better support for rebalancing a cluster that is not in `green` health, and expects to be able to rebalance some + shards away from over-full nodes to avoid allocating shards to undesirable locations in the first place. From + version 8.16 `allow_rebalance` setting defaults to `always` unless the legacy allocator is explicitly enabled. + notable: true diff --git a/docs/reference/modules/cluster/shards_allocation.asciidoc b/docs/reference/modules/cluster/shards_allocation.asciidoc index 1e425c77d1264..dc53837125ee9 100644 --- a/docs/reference/modules/cluster/shards_allocation.asciidoc +++ b/docs/reference/modules/cluster/shards_allocation.asciidoc @@ -98,9 +98,9 @@ the cluster: Specify when shard rebalancing is allowed: -* `always` - Always allow rebalancing. +* `always` - (default) Always allow rebalancing. * `indices_primaries_active` - Only when all primaries in the cluster are allocated. -* `indices_all_active` - (default) Only when all shards (primaries and replicas) in the cluster are allocated. +* `indices_all_active` - Only when all shards (primaries and replicas) in the cluster are allocated. -- `cluster.routing.rebalance.enable`:: diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDeciderIT.java new file mode 100644 index 0000000000000..2490eade46d31 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDeciderIT.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.cluster.ClusterModule; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.test.ESIntegTestCase; + +@ESIntegTestCase.ClusterScope(numDataNodes = 0) +public class ClusterRebalanceAllocationDeciderIT extends ESIntegTestCase { + public void testDefault() { + internalCluster().startNode(); + assertEquals( + ClusterRebalanceAllocationDecider.ClusterRebalanceType.ALWAYS, + ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.get( + internalCluster().getInstance(ClusterService.class).getSettings() + ) + ); + } + + public void testDefaultLegacyAllocator() { + internalCluster().startNode( + Settings.builder().put(ClusterModule.SHARDS_ALLOCATOR_TYPE_SETTING.getKey(), ClusterModule.BALANCED_ALLOCATOR) + ); + assertEquals( + ClusterRebalanceAllocationDecider.ClusterRebalanceType.INDICES_ALL_ACTIVE, + ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.get( + internalCluster().getInstance(ClusterService.class).getSettings() + ) + ); + } +} diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java index 88d4a652a5a39..7289b218b6be4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.elasticsearch.cluster.ClusterModule; import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; @@ -44,7 +45,9 @@ public class ClusterRebalanceAllocationDecider extends AllocationDecider { private static final String CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE = "cluster.routing.allocation.allow_rebalance"; public static final Setting CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING = new Setting<>( CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, - ClusterRebalanceType.INDICES_ALL_ACTIVE.toString(), + settings -> ClusterModule.DESIRED_BALANCE_ALLOCATOR.equals(ClusterModule.SHARDS_ALLOCATOR_TYPE_SETTING.get(settings)) + ? ClusterRebalanceType.ALWAYS.toString() + : ClusterRebalanceType.INDICES_ALL_ACTIVE.toString(), ClusterRebalanceType::parseString, Property.Dynamic, Property.NodeScope diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ClusterRebalanceRoutingTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ClusterRebalanceRoutingTests.java index 328777bfe28e7..7f9c69955adcd 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ClusterRebalanceRoutingTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ClusterRebalanceRoutingTests.java @@ -583,20 +583,28 @@ public void testClusterAllActive3() { public void testRebalanceWithIgnoredUnassignedShards() { final AtomicBoolean allocateTest1 = new AtomicBoolean(false); - AllocationService strategy = createAllocationService(Settings.EMPTY, new TestGatewayAllocator() { - @Override - public void allocateUnassigned( - ShardRouting shardRouting, - RoutingAllocation allocation, - UnassignedAllocationHandler unassignedAllocationHandler - ) { - if (allocateTest1.get() == false && "test1".equals(shardRouting.index().getName())) { - unassignedAllocationHandler.removeAndIgnore(UnassignedInfo.AllocationStatus.NO_ATTEMPT, allocation.changes()); - } else { - super.allocateUnassigned(shardRouting, allocation, unassignedAllocationHandler); + AllocationService strategy = createAllocationService( + Settings.builder() + .put( + ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), + ClusterRebalanceAllocationDecider.ClusterRebalanceType.INDICES_ALL_ACTIVE.toString() + ) + .build(), + new TestGatewayAllocator() { + @Override + public void allocateUnassigned( + ShardRouting shardRouting, + RoutingAllocation allocation, + UnassignedAllocationHandler unassignedAllocationHandler + ) { + if (allocateTest1.get() == false && "test1".equals(shardRouting.index().getName())) { + unassignedAllocationHandler.removeAndIgnore(UnassignedInfo.AllocationStatus.NO_ATTEMPT, allocation.changes()); + } else { + super.allocateUnassigned(shardRouting, allocation, unassignedAllocationHandler); + } } } - }); + ); Metadata metadata = Metadata.builder() .put(IndexMetadata.builder("test").settings(settings(IndexVersion.current())).numberOfShards(2).numberOfReplicas(0)) From 0289ca68b8278025babf20c505bee669ca09a348 Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Thu, 18 Jul 2024 13:54:32 +0200 Subject: [PATCH 48/65] Dense vector field types updatable for int4 (#110928) --- docs/changelog/110928.yaml | 5 + .../180_update_dense_vector_type.yml | 741 +++++++++++++++++- .../vectors/DenseVectorFieldMapper.java | 28 +- .../vectors/DenseVectorFieldMapperTests.java | 545 +++++++++++++ 4 files changed, 1310 insertions(+), 9 deletions(-) create mode 100644 docs/changelog/110928.yaml diff --git a/docs/changelog/110928.yaml b/docs/changelog/110928.yaml new file mode 100644 index 0000000000000..dcb2df6e6cca9 --- /dev/null +++ b/docs/changelog/110928.yaml @@ -0,0 +1,5 @@ +pr: 110928 +summary: Dense vector field types updatable for int4 +area: Vector Search +type: enhancement +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/180_update_dense_vector_type.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/180_update_dense_vector_type.yml index 3502a5e643087..855daeaa7f163 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/180_update_dense_vector_type.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/180_update_dense_vector_type.yml @@ -2,9 +2,8 @@ setup: - requires: cluster_features: "gte_v8.15.0" reason: 'updatable dense vector field types was added in 8.15' - - skip: - reason: "contains is a newly added assertion" - features: contains + - requires: + test_runner_features: [ contains ] --- "Test create and update dense vector mapping with per-doc indexing and flush": - do: @@ -1016,6 +1015,45 @@ setup: index_options: type: int8_flat +--- +"Disallowed dense vector update path hnsw --> int4_flat": + - requires: + cluster_features: "gte_v8.16.0" + reason: 'updatable dense vector field type for int4 was added in 8.16' + - do: + indices.create: + index: test_index + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: hnsw + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: hnsw } + + - do: + catch: /illegal_argument_exception/ + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_flat + --- "Disallowed dense vector update path int8_hnsw --> flat": - do: @@ -1088,6 +1126,67 @@ setup: index_options: type: int8_flat +--- +"Disallowed dense vector update path int4_hnsw --> int8_flat, int4_flat, flat": + - requires: + cluster_features: "gte_v8.16.0" + reason: 'updatable dense vector field type for int4 was added in 8.16' + - do: + indices.create: + index: test_index + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_hnsw + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: int4_hnsw } + + - do: + catch: /illegal_argument_exception/ + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int8_flat + - do: + catch: /illegal_argument_exception/ + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_flat + - do: + catch: /illegal_argument_exception/ + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: flat + --- "Disallowed dense vector update path int8_flat --> flat": - do: @@ -1124,6 +1223,56 @@ setup: index_options: type: flat +--- +"Disallowed dense vector update path int4_flat --> flat, int8_flat": + - requires: + cluster_features: "gte_v8.16.0" + reason: 'updatable dense vector field type for int4 was added in 8.16' + - do: + indices.create: + index: test_index + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_flat + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: int4_flat } + + - do: + catch: /illegal_argument_exception/ + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: flat + - do: + catch: /illegal_argument_exception/ + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int8_flat + --- "Allowed dense vector updates on same type but different other index_options, hnsw": - do: @@ -1320,6 +1469,103 @@ setup: ef_construction: 200 confidence_interval: 0.3 +--- +"Allowed dense vector updates on same type but different other index_options, int4_hnsw": + - requires: + cluster_features: "gte_v8.16.0" + reason: 'updatable dense vector field type for int4 was added in 8.16' + - requires: + test_runner_features: [ contains ] + - do: + indices.create: + index: test_index + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_hnsw + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: int4_hnsw } + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_hnsw + m: 32 + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: int4_hnsw } + - match: { test_index.mappings.properties.embedding.index_options.m: 32 } + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_hnsw + m: 32 + ef_construction: 200 + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: int4_hnsw } + - match: { test_index.mappings.properties.embedding.index_options.m: 32 } + - match: { test_index.mappings.properties.embedding.index_options.ef_construction: 200 } + + - do: + catch: /illegal_argument_exception/ # fails because m = 10 is less than the current value of 32 + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int8_hnsw + ef_construction: 200 + m: 10 + + - do: + catch: /illegal_argument_exception/ # fails because m = 16 by default, which is less than the current value of 32 + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int8_hnsw + ef_construction: 200 + --- "Allowed dense vector updates on same type but different other index_options, int8_flat": - do: @@ -1363,3 +1609,492 @@ setup: - match: { test_index.mappings.properties.embedding.type: dense_vector } - match: { test_index.mappings.properties.embedding.index_options.type: int8_flat } - match: { test_index.mappings.properties.embedding.index_options.confidence_interval: 0.3 } + +--- +"Allowed dense vector updates on same type but different other index_options, int4_flat": + - requires: + cluster_features: "gte_v8.16.0" + reason: 'updatable dense vector field type for int4 was added in 8.16' + - requires: + test_runner_features: [ contains ] + - do: + indices.create: + index: test_index + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_flat + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: int4_flat } + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_flat + confidence_interval: 0.3 + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: int4_flat } + - match: { test_index.mappings.properties.embedding.index_options.confidence_interval: 0.3 } + +--- +"Test create and update dense vector mapping to int4 with per-doc indexing and flush": + - requires: + cluster_features: "gte_v8.16.0" + reason: 'updatable dense vector field type for int4 was added in 8.16' + - requires: + test_runner_features: [ contains ] + - do: + indices.create: + index: test_index + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: flat + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: flat } + + - do: + index: + index: test_index + id: "1" + body: + embedding: [ 1, 1, 1, 1 ] + - do: + index: + index: test_index + id: "2" + body: + embedding: [ 1, 1, 1, 2 ] + - do: + index: + index: test_index + id: "3" + body: + embedding: [ 1, 1, 1, 3 ] + - do: + index: + index: test_index + id: "4" + body: + embedding: [ 1, 1, 1, 4 ] + - do: + index: + index: test_index + id: "5" + body: + embedding: [ 1, 1, 1, 5 ] + + - do: + indices.flush: { } + + - do: + index: + index: test_index + id: "6" + body: + embedding: [ 1, 1, 1, 6 ] + - do: + index: + index: test_index + id: "7" + body: + embedding: [ 1, 1, 1, 7 ] + - do: + index: + index: test_index + id: "8" + body: + embedding: [ 1, 1, 1, 8 ] + - do: + index: + index: test_index + id: "9" + body: + embedding: [ 1, 1, 1, 9 ] + - do: + index: + index: test_index + id: "10" + body: + embedding: [ 1, 1, 1, 10 ] + + - do: + indices.flush: { } + + - do: + indices.refresh: {} + + - do: + search: + index: test_index + body: + size: 3 + query: + knn: + field: embedding + query_vector: [1, 1, 1, 1] + num_candidates: 10 + + - match: { hits.total.value: 10 } + - length: {hits.hits: 3} + - contains: { hits.hits: { _id: "1" } } + - contains: { hits.hits: { _id: "2" } } + - contains: { hits.hits: { _id: "3" } } + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_flat + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: int4_flat } + + - do: + index: + index: test_index + id: "11" + body: + embedding: [ 2, 1, 1, 1 ] + - do: + index: + index: test_index + id: "12" + body: + embedding: [ 3, 1, 1, 2 ] + - do: + index: + index: test_index + id: "13" + body: + embedding: [ 4, 1, 1, 3 ] + - do: + index: + index: test_index + id: "14" + body: + embedding: [ 5, 1, 1, 4 ] + - do: + index: + index: test_index + id: "15" + body: + embedding: [ 6, 1, 1, 5 ] + + - do: + indices.flush: { } + + - do: + index: + index: test_index + id: "16" + body: + embedding: [ 7, 1, 1, 6 ] + - do: + index: + index: test_index + id: "17" + body: + embedding: [ 8, 1, 1, 7 ] + - do: + index: + index: test_index + id: "18" + body: + embedding: [ 9, 1, 1, 8 ] + - do: + index: + index: test_index + id: "19" + body: + embedding: [ 10, 1, 1, 9 ] + - do: + index: + index: test_index + id: "20" + body: + embedding: [ 1, 11, 1, 10 ] + + - do: + indices.flush: { } + + - do: + indices.refresh: {} + + - do: + search: + index: test_index + body: + size: 3 + query: + knn: + field: embedding + query_vector: [ 1, 1, 1, 1 ] + num_candidates: 20 + + - match: { hits.total.value: 20 } + - length: { hits.hits: 3 } + - contains: { hits.hits: { _id: "1" } } + - contains: { hits.hits: { _id: "11" } } + - contains: { hits.hits: { _id: "2" } } + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int8_hnsw + m: 3 + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: int8_hnsw } + + - do: + index: + index: test_index + id: "21" + body: + embedding: [ 1, 1, 2, 1 ] + - do: + index: + index: test_index + id: "22" + body: + embedding: [ 1, 1, 3, 1 ] + - do: + index: + index: test_index + id: "23" + body: + embedding: [ 1, 1, 4, 1 ] + - do: + index: + index: test_index + id: "24" + body: + embedding: [ 1, 1, 5, 1 ] + - do: + index: + index: test_index + id: "25" + body: + embedding: [ 1, 1, 6, 1 ] + + - do: + indices.flush: { } + + - do: + index: + index: test_index + id: "26" + body: + embedding: [ 1, 1, 7, 1 ] + - do: + index: + index: test_index + id: "27" + body: + embedding: [ 1, 1, 8, 1 ] + - do: + index: + index: test_index + id: "28" + body: + embedding: [ 1, 1, 9, 1 ] + - do: + index: + index: test_index + id: "29" + body: + embedding: [ 1, 1, 10, 1 ] + - do: + index: + index: test_index + id: "30" + body: + embedding: [ 1, 1, 11, 1 ] + + - do: + indices.flush: { } + + - do: + indices.refresh: {} + + - do: + search: + index: test_index + body: + size: 4 + query: + knn: + field: embedding + query_vector: [ 1, 1, 1, 1 ] + num_candidates: 30 + + - match: { hits.total.value: 30 } + - length: { hits.hits: 4 } + - contains: {hits.hits: {_id: "1"}} + - contains: {hits.hits: {_id: "11"}} + - contains: {hits.hits: {_id: "2"}} + - contains: {hits.hits: {_id: "21"}} + + - do: + indices.put_mapping: + index: test_index + body: + properties: + embedding: + type: dense_vector + dims: 4 + index_options: + type: int4_hnsw + ef_construction: 200 + + - do: + indices.get_mapping: + index: test_index + + - match: { test_index.mappings.properties.embedding.type: dense_vector } + - match: { test_index.mappings.properties.embedding.index_options.type: int4_hnsw } + + - do: + index: + index: test_index + id: "31" + body: + embedding: [ 1, 1, 1, 2 ] + - do: + index: + index: test_index + id: "32" + body: + embedding: [ 1, 1, 1, 3 ] + - do: + index: + index: test_index + id: "33" + body: + embedding: [ 1, 1, 1, 4 ] + - do: + index: + index: test_index + id: "34" + body: + embedding: [ 1, 1, 1, 5 ] + - do: + index: + index: test_index + id: "35" + body: + embedding: [ 1, 1, 1, 6 ] + + - do: + indices.flush: { } + + - do: + index: + index: test_index + id: "36" + body: + embedding: [ 1, 1, 1, 7 ] + - do: + index: + index: test_index + id: "37" + body: + embedding: [ 1, 1, 1, 8 ] + - do: + index: + index: test_index + id: "38" + body: + embedding: [ 1, 1, 1, 9 ] + - do: + index: + index: test_index + id: "39" + body: + embedding: [ 1, 1, 1, 10 ] + - do: + index: + index: test_index + id: "40" + body: + embedding: [ 1, 1, 1, 11 ] + + - do: + indices.flush: { } + + - do: + indices.refresh: {} + + - do: + search: + index: test_index + body: + size: 5 + query: + knn: + field: embedding + query_vector: [ 1, 1, 1, 1 ] + num_candidates: 40 + + - match: { hits.total.value: 40 } + - length: { hits.hits: 5 } + - contains: {hits.hits: {_id: "1"}} + - contains: {hits.hits: {_id: "11"}} + - contains: {hits.hits: {_id: "2"}} + - contains: {hits.hits: {_id: "21"}} + - contains: {hits.hits: {_id: "31"}} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index d27c0acdb6b2e..2c1ac0d35c898 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -1318,7 +1318,9 @@ boolean supportsElementType(ElementType elementType) { boolean updatableTo(IndexOptions update) { return update.type.equals(this.type) || update.type.equals(VectorIndexType.HNSW.name) - || update.type.equals(VectorIndexType.INT8_HNSW.name); + || update.type.equals(VectorIndexType.INT8_HNSW.name) + || update.type.equals(VectorIndexType.INT4_HNSW.name) + || update.type.equals(VectorIndexType.INT4_FLAT.name); } } @@ -1425,7 +1427,14 @@ boolean supportsElementType(ElementType elementType) { @Override boolean updatableTo(IndexOptions update) { - return Objects.equals(this, update); + boolean updatable = update.type.equals(this.type); + if (updatable) { + Int4HnswIndexOptions int4HnswIndexOptions = (Int4HnswIndexOptions) update; + // fewer connections would break assumptions on max number of connections (based on largest previous graph) during merge + // quantization could not behave as expected with different confidence intervals (and quantiles) to be created + updatable = int4HnswIndexOptions.m >= this.m && confidenceInterval == int4HnswIndexOptions.confidenceInterval; + } + return updatable; } @Override @@ -1487,7 +1496,10 @@ boolean supportsElementType(ElementType elementType) { @Override boolean updatableTo(IndexOptions update) { // TODO: add support for updating from flat, hnsw, and int8_hnsw and updating params - return Objects.equals(this, update); + return update.type.equals(this.type) + || update.type.equals(VectorIndexType.HNSW.name) + || update.type.equals(VectorIndexType.INT8_HNSW.name) + || update.type.equals(VectorIndexType.INT4_HNSW.name); } @Override @@ -1562,8 +1574,8 @@ boolean supportsElementType(ElementType elementType) { @Override boolean updatableTo(IndexOptions update) { - boolean updatable = update.type.equals(this.type); - if (updatable) { + boolean updatable; + if (update.type.equals(this.type)) { Int8HnswIndexOptions int8HnswIndexOptions = (Int8HnswIndexOptions) update; // fewer connections would break assumptions on max number of connections (based on largest previous graph) during merge // quantization could not behave as expected with different confidence intervals (and quantiles) to be created @@ -1571,6 +1583,8 @@ boolean updatableTo(IndexOptions update) { updatable &= confidenceInterval == null || int8HnswIndexOptions.confidenceInterval != null && confidenceInterval.equals(int8HnswIndexOptions.confidenceInterval); + } else { + updatable = update.type.equals(VectorIndexType.INT4_HNSW.name) && ((Int4HnswIndexOptions) update).m >= this.m; } return updatable; } @@ -1602,7 +1616,9 @@ boolean updatableTo(IndexOptions update) { HnswIndexOptions hnswIndexOptions = (HnswIndexOptions) update; updatable = hnswIndexOptions.m >= this.m; } - return updatable || (update.type.equals(VectorIndexType.INT8_HNSW.name) && ((Int8HnswIndexOptions) update).m >= m); + return updatable + || (update.type.equals(VectorIndexType.INT8_HNSW.name) && ((Int8HnswIndexOptions) update).m >= m) + || (update.type.equals(VectorIndexType.INT4_HNSW.name) && ((Int4HnswIndexOptions) update).m >= m); } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 3dd4e31b9ca3f..4727c65be60e9 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -195,6 +195,7 @@ protected void registerParameters(ParameterChecker checker) throws IOException { .field("element_type", "bit") ) ); + // update for flat checker.registerUpdateCheck( b -> b.field("type", "dense_vector") .field("dims", dims) @@ -210,6 +211,21 @@ protected void registerParameters(ParameterChecker checker) throws IOException { .endObject(), m -> assertTrue(m.toString().contains("\"type\":\"int8_flat\"")) ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_flat\"")) + ); checker.registerUpdateCheck( b -> b.field("type", "dense_vector") .field("dims", dims) @@ -240,6 +256,22 @@ protected void registerParameters(ParameterChecker checker) throws IOException { .endObject(), m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) + ); + // update for int8_flat checker.registerUpdateCheck( b -> b.field("type", "dense_vector") .field("dims", dims) @@ -270,6 +302,56 @@ protected void registerParameters(ParameterChecker checker) throws IOException { .endObject(), m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_flat\"")) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + // update for hnsw checker.registerUpdateCheck( b -> b.field("type", "dense_vector") .field("dims", dims) @@ -285,6 +367,37 @@ protected void registerParameters(ParameterChecker checker) throws IOException { .endObject(), m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .field("m", 100) + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) + ); checker.registerConflictCheck( "index_options", fieldMapping( @@ -304,6 +417,438 @@ protected void registerParameters(ParameterChecker checker) throws IOException { .endObject() ) ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .field("m", 16) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ) + ); + // update for int8_hnsw + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .field("m", 256) + .endObject(), + m -> assertTrue(m.toString().contains("\"m\":256")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("m", 256) + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .field("m", 16) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject() + ) + ); + // update for int4_flat + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + // update for int4_hnsw + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("m", 256) + .field("type", "int4_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"m\":256")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("confidence_interval", 0.03) + .field("m", 4) + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("confidence_interval", 0.03) + .field("m", 100) + .endObject(), + m -> assertTrue(m.toString().contains("\"m\":100")) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("m", 16) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("confidence_interval", 0.3) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .field("m", 16) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .field("m", 16) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject() + ) + ); } @Override From 2b011f1ccc58b437787f76e0082d884932982242 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 18 Jul 2024 22:06:54 +1000 Subject: [PATCH 49/65] Mute org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderIT testEnterpriseDownloaderTask #111002 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index d91c462ac22c5..869d78af20fbe 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -105,6 +105,9 @@ tests: - class: org.elasticsearch.search.sort.FieldSortIT method: testIssue6614 issue: https://github.com/elastic/elasticsearch/issues/110999 +- class: org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderIT + method: testEnterpriseDownloaderTask + issue: https://github.com/elastic/elasticsearch/issues/111002 # Examples: # From 3de980f8fc0def50254b7e437c0e5127c421e12f Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Thu, 18 Jul 2024 13:09:09 +0100 Subject: [PATCH 50/65] [DOCS] Fix rendering bug (#111025) Closes https://github.com/elastic/elasticsearch/issues/111023 --- docs/reference/search/search.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/search/search.asciidoc b/docs/reference/search/search.asciidoc index 15985088a6ff7..501d645665a02 100644 --- a/docs/reference/search/search.asciidoc +++ b/docs/reference/search/search.asciidoc @@ -141,7 +141,7 @@ When unspecified, the pre-filter phase is executed if any of these conditions is - The primary sort of the query targets an indexed field. [[search-preference]] -tag::search-preference[] +// tag::search-preference[] `preference`:: (Optional, string) Nodes and shards used for the search. By default, {es} selects from eligible @@ -178,7 +178,7 @@ Any string that does not start with `_`. If the cluster state and selected shards do not change, searches using the same `` value are routed to the same shards in the same order. ==== -end::search-preference[] +// end::search-preference[] [[search-api-query-params-q]] From fae782ad3dc0569f4306883524c278e5c6fb1716 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 18 Jul 2024 22:32:17 +1000 Subject: [PATCH 51/65] Mute org.elasticsearch.datastreams.DataStreamsClientYamlTestSuiteIT test {p0=data_stream/190_failure_store_redirection/Ensure failure is redirected to correct failure store after a reroute processor} #111041 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 869d78af20fbe..77b34355c913f 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -108,6 +108,9 @@ tests: - class: org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderIT method: testEnterpriseDownloaderTask issue: https://github.com/elastic/elasticsearch/issues/111002 +- class: org.elasticsearch.datastreams.DataStreamsClientYamlTestSuiteIT + method: test {p0=data_stream/190_failure_store_redirection/Ensure failure is redirected to correct failure store after a reroute processor} + issue: https://github.com/elastic/elasticsearch/issues/111041 # Examples: # From 26847ef228ef7b3e2211cc13234c818d20b6604a Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 18 Jul 2024 14:32:24 +0200 Subject: [PATCH 52/65] Fix Comparator returned by MinMax (#111029) The `right == null` case was obviously wrong here for sorting nulls at either end. closes #110991 --- .../main/java/org/elasticsearch/search/sort/MinAndMax.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/sort/MinAndMax.java b/server/src/main/java/org/elasticsearch/search/sort/MinAndMax.java index 88c9f766d536c..c512b6695befb 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/MinAndMax.java +++ b/server/src/main/java/org/elasticsearch/search/sort/MinAndMax.java @@ -60,7 +60,7 @@ public T getMax() { if (left == null) { return right == null ? 0 : -1; // nulls last } - return right == null ? -1 : left.getMin().compareTo(right.getMin()); + return right == null ? 1 : left.getMin().compareTo(right.getMin()); }; @SuppressWarnings({ "unchecked", "rawtypes" }) @@ -68,7 +68,7 @@ public T getMax() { if (left == null) { return right == null ? 0 : 1; // nulls first } - return right == null ? 1 : right.getMax().compareTo(left.getMax()); + return right == null ? -1 : right.getMax().compareTo(left.getMax()); }; /** From 43cdda4193eb6687cddb2877fb6daecc058e771e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 18 Jul 2024 22:34:36 +1000 Subject: [PATCH 53/65] Mute org.elasticsearch.xpack.esql.querydsl.query.SingleValueQueryTests org.elasticsearch.xpack.esql.querydsl.query.SingleValueQueryTests #111042 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 77b34355c913f..3a4363c2e96ca 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -111,6 +111,8 @@ tests: - class: org.elasticsearch.datastreams.DataStreamsClientYamlTestSuiteIT method: test {p0=data_stream/190_failure_store_redirection/Ensure failure is redirected to correct failure store after a reroute processor} issue: https://github.com/elastic/elasticsearch/issues/111041 +- class: org.elasticsearch.xpack.esql.querydsl.query.SingleValueQueryTests + issue: https://github.com/elastic/elasticsearch/issues/111042 # Examples: # From 764894fd65b1fcccc055a174723fcb4a4f0e72ef Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 18 Jul 2024 13:39:19 +0100 Subject: [PATCH 54/65] Collapse nested listeners in get-snapshots action (#111028) There's no need for two layers of `RefCountingListener` any more, we propagate all failures to the top level in any case, so we can run everything under a single listener. --- .../get/TransportGetSnapshotsAction.java | 134 +++++++++--------- 1 file changed, 64 insertions(+), 70 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java index 213c8003b7047..1a279e3488123 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java @@ -163,7 +163,7 @@ protected void masterOperation( SnapshotsInProgress.get(state), request.verbose(), request.includeIndexNames() - ).getMultipleReposSnapshotInfo(listener); + ).runOperation(listener); } /** @@ -256,36 +256,11 @@ private class GetSnapshotsOperation { } } - void getMultipleReposSnapshotInfo(ActionListener listener) { - SubscribableListener - - .newForked(repositoriesDoneListener -> { - try (var listeners = new RefCountingListener(repositoriesDoneListener)) { - for (final RepositoryMetadata repository : repositories) { - final String repoName = repository.name(); - if (skipRepository(repoName)) { - continue; - } - - if (listeners.isFailing()) { - return; - } - - SubscribableListener.newForked(l -> maybeGetRepositoryData(repoName, l)) - .andThen((repositoryListener, repositoryData) -> { - assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); - cancellableTask.ensureNotCancelled(); - ensureRequiredNamesPresent(repoName, repositoryData); - loadSnapshotInfos( - getAsyncSnapshotInfoIterator(repositoriesService.repository(repoName), repositoryData), - repositoryListener - ); - }) - .addListener(listeners.acquire()); - } - } - }) - + /** + * Run the get-snapshots operation and compute the response. + */ + void runOperation(ActionListener listener) { + SubscribableListener.newForked(this::populateResults) .addListener( listener.map(ignored -> buildResponse()), // If we didn't load any SnapshotInfo blobs from the repo (e.g. verbose=false or current-snapshots-only) then this @@ -296,6 +271,64 @@ void getMultipleReposSnapshotInfo(ActionListener listener) ); } + /** + * Populate the results fields ({@link #allSnapshotInfos} and {@link #totalCount}). + */ + private void populateResults(ActionListener listener) { + try (var listeners = new RefCountingListener(listener)) { + for (final RepositoryMetadata repository : repositories) { + final String repositoryName = repository.name(); + if (skipRepository(repositoryName)) { + continue; + } + + if (listeners.isFailing()) { + return; + } + + maybeGetRepositoryData(repositoryName, listeners.acquire(repositoryData -> { + assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); + cancellableTask.ensureNotCancelled(); + ensureRequiredNamesPresent(repositoryName, repositoryData); + ThrottledIterator.run( + Iterators.failFast( + getAsyncSnapshotInfoIterator(repositoriesService.repository(repositoryName), repositoryData), + () -> cancellableTask.isCancelled() || listeners.isFailing() + ), + (ref, asyncSnapshotInfo) -> ActionListener.run( + ActionListener.runBefore(listeners.acquire(), ref::close), + refListener -> asyncSnapshotInfo.getSnapshotInfo(new ActionListener<>() { + @Override + public void onResponse(SnapshotInfo snapshotInfo) { + if (matchesPredicates(snapshotInfo)) { + totalCount.incrementAndGet(); + if (afterPredicate.test(snapshotInfo)) { + allSnapshotInfos.add(snapshotInfo.maybeWithoutIndices(indices)); + } + } + refListener.onResponse(null); + } + + @Override + public void onFailure(Exception e) { + if (ignoreUnavailable) { + logger.warn(Strings.format("failed to fetch snapshot info for [%s]", asyncSnapshotInfo), e); + refListener.onResponse(null); + } else { + refListener.onFailure(e); + } + } + }) + ), + getSnapshotInfoExecutor.getMaxRunningTasks(), + () -> {}, + () -> {} + ); + })); + } + } + } + private void maybeGetRepositoryData(String repositoryName, ActionListener listener) { if (snapshotNamePredicate == SnapshotNamePredicate.MATCH_CURRENT_ONLY) { listener.onResponse(null); @@ -464,45 +497,6 @@ private Map> getIndicesLookup(RepositoryData repository return snapshotsToIndices; } - private void loadSnapshotInfos(Iterator asyncSnapshotInfoIterator, ActionListener listener) { - if (cancellableTask.notifyIfCancelled(listener)) { - return; - } - try (var listeners = new RefCountingListener(listener)) { - ThrottledIterator.run( - Iterators.failFast(asyncSnapshotInfoIterator, () -> cancellableTask.isCancelled() || listeners.isFailing()), - (ref, asyncSnapshotInfo) -> { - final var refListener = ActionListener.runBefore(listeners.acquire(), ref::close); - asyncSnapshotInfo.getSnapshotInfo(new ActionListener<>() { - @Override - public void onResponse(SnapshotInfo snapshotInfo) { - if (matchesPredicates(snapshotInfo)) { - totalCount.incrementAndGet(); - if (afterPredicate.test(snapshotInfo)) { - allSnapshotInfos.add(snapshotInfo.maybeWithoutIndices(indices)); - } - } - refListener.onResponse(null); - } - - @Override - public void onFailure(Exception e) { - if (ignoreUnavailable) { - logger.warn(Strings.format("failed to fetch snapshot info for [%s]", asyncSnapshotInfo), e); - refListener.onResponse(null); - } else { - refListener.onFailure(e); - } - } - }); - }, - getSnapshotInfoExecutor.getMaxRunningTasks(), - () -> {}, - () -> {} - ); - } - } - private GetSnapshotsResponse buildResponse() { assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); // see [NOTE ON THREADING] cancellableTask.ensureNotCancelled(); From 7e292dbbb8b9a912c182c61bc98e2fc516bce245 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 18 Jul 2024 22:52:16 +1000 Subject: [PATCH 55/65] Mute org.elasticsearch.compute.lucene.ValueSourceReaderTypeConversionTests testLoadAllStatusAllInOnePage #111048 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 3a4363c2e96ca..3e64358a98503 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -113,6 +113,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/111041 - class: org.elasticsearch.xpack.esql.querydsl.query.SingleValueQueryTests issue: https://github.com/elastic/elasticsearch/issues/111042 +- class: org.elasticsearch.compute.lucene.ValueSourceReaderTypeConversionTests + method: testLoadAllStatusAllInOnePage + issue: https://github.com/elastic/elasticsearch/issues/111048 # Examples: # From 489100ab1e1eb3175b29bb6d9ff1821eb20f253b Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Thu, 18 Jul 2024 09:21:25 -0400 Subject: [PATCH 56/65] Clean up some of the kNN code, removing unused paths (#110972) These `float[]` functions were only used by tests. Instead we should use the `VectorData` abstraction. Additionally, this updates our internal abstractions to take advantage of our vector index type enumerations. This unifies and simplifies validation logic, etc. --- .../vectors/DenseVectorFieldMapper.java | 235 +++++++++++------- .../vectors/KnnScoreDocQueryBuilder.java | 10 - .../action/search/DfsQueryPhaseTests.java | 5 +- .../vectors/DenseVectorFieldMapperTests.java | 49 +++- .../vectors/DenseVectorFieldTypeTests.java | 14 +- .../vectors/KnnScoreDocQueryBuilderTests.java | 20 +- 6 files changed, 204 insertions(+), 129 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 2c1ac0d35c898..8ffe4b4cc4a66 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -226,14 +226,17 @@ public Builder(String name, IndexVersion indexVersionCreated) { if (v != null && dims.isConfigured() && dims.get() != null) { v.validateDimension(dims.get()); } - if (v != null && v.supportsElementType(elementType.getValue()) == false) { - throw new IllegalArgumentException( - "[element_type] cannot be [" + elementType.getValue().toString() + "] when using index type [" + v.type + "]" - ); + if (v != null) { + v.validateElementType(elementType.getValue()); } }) .acceptsNull() - .setMergeValidator((previous, current, c) -> previous == null || current == null || previous.updatableTo(current)); + .setMergeValidator( + (previous, current, c) -> previous == null + || current == null + || Objects.equals(previous, current) + || previous.updatableTo(current) + ); if (defaultInt8Hnsw) { this.indexOptions.alwaysSerialize(); } @@ -1146,22 +1149,50 @@ public final String toString() { } abstract static class IndexOptions implements ToXContent { - final String type; + final VectorIndexType type; - IndexOptions(String type) { + IndexOptions(VectorIndexType type) { this.type = type; } abstract KnnVectorsFormat getVectorsFormat(ElementType elementType); - boolean supportsElementType(ElementType elementType) { - return true; + final void validateElementType(ElementType elementType) { + if (type.supportsElementType(elementType) == false) { + throw new IllegalArgumentException( + "[element_type] cannot be [" + elementType.toString() + "] when using index type [" + type + "]" + ); + } } abstract boolean updatableTo(IndexOptions update); - void validateDimension(int dim) { - // no-op + public final void validateDimension(int dim) { + if (type.supportsDimension(dim)) { + return; + } + throw new IllegalArgumentException(type.name + " only supports even dimensions; provided=" + dim); + } + + abstract boolean doEquals(IndexOptions other); + + abstract int doHashCode(); + + @Override + public final boolean equals(Object other) { + if (other == this) { + return true; + } + if (other == null || other.getClass() != getClass()) { + return false; + } + IndexOptions otherOptions = (IndexOptions) other; + return Objects.equals(type, otherOptions.type) && doEquals(otherOptions); + } + + @Override + public final int hashCode() { + return Objects.hash(type, doHashCode()); } } @@ -1182,6 +1213,16 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new HnswIndexOptions(m, efConstruction); } + + @Override + public boolean supportsElementType(ElementType elementType) { + return true; + } + + @Override + public boolean supportsDimension(int dims) { + return true; + } }, INT8_HNSW("int8_hnsw") { @Override @@ -1204,6 +1245,16 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new Int8HnswIndexOptions(m, efConstruction, confidenceInterval); } + + @Override + public boolean supportsElementType(ElementType elementType) { + return elementType == ElementType.FLOAT; + } + + @Override + public boolean supportsDimension(int dims) { + return true; + } }, INT4_HNSW("int4_hnsw") { public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap) { @@ -1225,6 +1276,16 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new Int4HnswIndexOptions(m, efConstruction, confidenceInterval); } + + @Override + public boolean supportsElementType(ElementType elementType) { + return elementType == ElementType.FLOAT; + } + + @Override + public boolean supportsDimension(int dims) { + return dims % 2 == 0; + } }, FLAT("flat") { @Override @@ -1232,6 +1293,16 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new FlatIndexOptions(); } + + @Override + public boolean supportsElementType(ElementType elementType) { + return true; + } + + @Override + public boolean supportsDimension(int dims) { + return true; + } }, INT8_FLAT("int8_flat") { @Override @@ -1244,6 +1315,16 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new Int8FlatIndexOptions(confidenceInterval); } + + @Override + public boolean supportsElementType(ElementType elementType) { + return elementType == ElementType.FLOAT; + } + + @Override + public boolean supportsDimension(int dims) { + return true; + } }, INT4_FLAT("int4_flat") { @Override @@ -1256,6 +1337,16 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new Int4FlatIndexOptions(confidenceInterval); } + + @Override + public boolean supportsElementType(ElementType elementType) { + return elementType == ElementType.FLOAT; + } + + @Override + public boolean supportsDimension(int dims) { + return dims % 2 == 0; + } }; static Optional fromString(String type) { @@ -1269,13 +1360,22 @@ static Optional fromString(String type) { } abstract IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap); + + public abstract boolean supportsElementType(ElementType elementType); + + public abstract boolean supportsDimension(int dims); + + @Override + public String toString() { + return name; + } } static class Int8FlatIndexOptions extends IndexOptions { private final Float confidenceInterval; Int8FlatIndexOptions(Float confidenceInterval) { - super("int8_flat"); + super(VectorIndexType.INT8_FLAT); this.confidenceInterval = confidenceInterval; } @@ -1297,37 +1397,30 @@ KnnVectorsFormat getVectorsFormat(ElementType elementType) { } @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; + boolean doEquals(IndexOptions o) { Int8FlatIndexOptions that = (Int8FlatIndexOptions) o; return Objects.equals(confidenceInterval, that.confidenceInterval); } @Override - public int hashCode() { + int doHashCode() { return Objects.hash(confidenceInterval); } - @Override - boolean supportsElementType(ElementType elementType) { - return elementType == ElementType.FLOAT; - } - @Override boolean updatableTo(IndexOptions update) { return update.type.equals(this.type) - || update.type.equals(VectorIndexType.HNSW.name) - || update.type.equals(VectorIndexType.INT8_HNSW.name) - || update.type.equals(VectorIndexType.INT4_HNSW.name) - || update.type.equals(VectorIndexType.INT4_FLAT.name); + || update.type.equals(VectorIndexType.HNSW) + || update.type.equals(VectorIndexType.INT8_HNSW) + || update.type.equals(VectorIndexType.INT4_HNSW) + || update.type.equals(VectorIndexType.INT4_FLAT); } } static class FlatIndexOptions extends IndexOptions { FlatIndexOptions() { - super("flat"); + super(VectorIndexType.FLAT); } @Override @@ -1352,13 +1445,12 @@ boolean updatableTo(IndexOptions update) { } @Override - public boolean equals(Object o) { - if (this == o) return true; - return o != null && getClass() == o.getClass(); + public boolean doEquals(IndexOptions o) { + return o instanceof FlatIndexOptions; } @Override - public int hashCode() { + public int doHashCode() { return Objects.hash(type); } } @@ -1369,7 +1461,7 @@ static class Int4HnswIndexOptions extends IndexOptions { private final float confidenceInterval; Int4HnswIndexOptions(int m, int efConstruction, Float confidenceInterval) { - super("int4_hnsw"); + super(VectorIndexType.INT4_HNSW); this.m = m; this.efConstruction = efConstruction; // The default confidence interval for int4 is dynamic quantiles, this provides the best relevancy and is @@ -1395,15 +1487,13 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; + public boolean doEquals(IndexOptions o) { Int4HnswIndexOptions that = (Int4HnswIndexOptions) o; return m == that.m && efConstruction == that.efConstruction && Objects.equals(confidenceInterval, that.confidenceInterval); } @Override - public int hashCode() { + public int doHashCode() { return Objects.hash(m, efConstruction, confidenceInterval); } @@ -1420,11 +1510,6 @@ public String toString() { + "}"; } - @Override - boolean supportsElementType(ElementType elementType) { - return elementType == ElementType.FLOAT; - } - @Override boolean updatableTo(IndexOptions update) { boolean updatable = update.type.equals(this.type); @@ -1436,20 +1521,13 @@ boolean updatableTo(IndexOptions update) { } return updatable; } - - @Override - void validateDimension(int dim) { - if (dim % 2 != 0) { - throw new IllegalArgumentException("int4_hnsw only supports even dimensions; provided=" + dim); - } - } } static class Int4FlatIndexOptions extends IndexOptions { private final float confidenceInterval; Int4FlatIndexOptions(Float confidenceInterval) { - super("int4_flat"); + super(VectorIndexType.INT4_FLAT); // The default confidence interval for int4 is dynamic quantiles, this provides the best relevancy and is // effectively required for int4 to behave well across a wide range of data. this.confidenceInterval = confidenceInterval == null ? 0f : confidenceInterval; @@ -1471,7 +1549,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - public boolean equals(Object o) { + public boolean doEquals(IndexOptions o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Int4FlatIndexOptions that = (Int4FlatIndexOptions) o; @@ -1479,7 +1557,7 @@ public boolean equals(Object o) { } @Override - public int hashCode() { + public int doHashCode() { return Objects.hash(confidenceInterval); } @@ -1488,26 +1566,15 @@ public String toString() { return "{type=" + type + ", confidence_interval=" + confidenceInterval + "}"; } - @Override - boolean supportsElementType(ElementType elementType) { - return elementType == ElementType.FLOAT; - } - @Override boolean updatableTo(IndexOptions update) { // TODO: add support for updating from flat, hnsw, and int8_hnsw and updating params return update.type.equals(this.type) - || update.type.equals(VectorIndexType.HNSW.name) - || update.type.equals(VectorIndexType.INT8_HNSW.name) - || update.type.equals(VectorIndexType.INT4_HNSW.name); + || update.type.equals(VectorIndexType.HNSW) + || update.type.equals(VectorIndexType.INT8_HNSW) + || update.type.equals(VectorIndexType.INT4_HNSW); } - @Override - void validateDimension(int dim) { - if (dim % 2 != 0) { - throw new IllegalArgumentException("int4_flat only supports even dimensions; provided=" + dim); - } - } } static class Int8HnswIndexOptions extends IndexOptions { @@ -1516,7 +1583,7 @@ static class Int8HnswIndexOptions extends IndexOptions { private final Float confidenceInterval; Int8HnswIndexOptions(int m, int efConstruction, Float confidenceInterval) { - super("int8_hnsw"); + super(VectorIndexType.INT8_HNSW); this.m = m; this.efConstruction = efConstruction; this.confidenceInterval = confidenceInterval; @@ -1542,7 +1609,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - public boolean equals(Object o) { + public boolean doEquals(IndexOptions o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Int8HnswIndexOptions that = (Int8HnswIndexOptions) o; @@ -1550,7 +1617,7 @@ public boolean equals(Object o) { } @Override - public int hashCode() { + public int doHashCode() { return Objects.hash(m, efConstruction, confidenceInterval); } @@ -1567,11 +1634,6 @@ public String toString() { + "}"; } - @Override - boolean supportsElementType(ElementType elementType) { - return elementType == ElementType.FLOAT; - } - @Override boolean updatableTo(IndexOptions update) { boolean updatable; @@ -1584,7 +1646,7 @@ boolean updatableTo(IndexOptions update) { || int8HnswIndexOptions.confidenceInterval != null && confidenceInterval.equals(int8HnswIndexOptions.confidenceInterval); } else { - updatable = update.type.equals(VectorIndexType.INT4_HNSW.name) && ((Int4HnswIndexOptions) update).m >= this.m; + updatable = update.type.equals(VectorIndexType.INT4_HNSW) && ((Int4HnswIndexOptions) update).m >= this.m; } return updatable; } @@ -1595,7 +1657,7 @@ static class HnswIndexOptions extends IndexOptions { private final int efConstruction; HnswIndexOptions(int m, int efConstruction) { - super("hnsw"); + super(VectorIndexType.HNSW); this.m = m; this.efConstruction = efConstruction; } @@ -1617,8 +1679,8 @@ boolean updatableTo(IndexOptions update) { updatable = hnswIndexOptions.m >= this.m; } return updatable - || (update.type.equals(VectorIndexType.INT8_HNSW.name) && ((Int8HnswIndexOptions) update).m >= m) - || (update.type.equals(VectorIndexType.INT4_HNSW.name) && ((Int4HnswIndexOptions) update).m >= m); + || (update.type.equals(VectorIndexType.INT8_HNSW) && ((Int8HnswIndexOptions) update).m >= m) + || (update.type.equals(VectorIndexType.INT4_HNSW) && ((Int4HnswIndexOptions) update).m >= m); } @Override @@ -1632,7 +1694,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - public boolean equals(Object o) { + public boolean doEquals(IndexOptions o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; HnswIndexOptions that = (HnswIndexOptions) o; @@ -1640,8 +1702,8 @@ public boolean equals(Object o) { } @Override - public int hashCode() { - return Objects.hash(type, m, efConstruction); + public int doHashCode() { + return Objects.hash(m, efConstruction); } @Override @@ -1773,17 +1835,6 @@ && isNotUnitVector(squaredMagnitude)) { return new DenseVectorQuery.Floats(queryVector, name()); } - Query createKnnQuery( - float[] queryVector, - Integer k, - int numCands, - Query filter, - Float similarityThreshold, - BitSetProducer parentFilter - ) { - return createKnnQuery(VectorData.fromFloats(queryVector), k, numCands, filter, similarityThreshold, parentFilter); - } - public Query createKnnQuery( VectorData queryVector, Integer k, @@ -1900,10 +1951,6 @@ int getVectorDimensions() { ElementType getElementType() { return elementType; } - - IndexOptions getIndexOptions() { - return indexOptions; - } } private final IndexOptions indexOptions; diff --git a/server/src/main/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilder.java index 65f8c60297ad8..4be1d7a613a76 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilder.java @@ -38,16 +38,6 @@ public class KnnScoreDocQueryBuilder extends AbstractQueryBuilder denseVectorFieldType.createKnnQuery(new float[] { 128, 0, 0 }, 3, 3, null, null, null) + () -> denseVectorFieldType.createKnnQuery(VectorData.fromFloats(new float[] { 128, 0, 0 }), 3, 3, null, null, null) ); assertThat( e.getMessage(), @@ -1695,7 +1696,7 @@ public void testByteVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 0.0f, 0f, -129.0f }, 3, 3, null, null, null) + () -> denseVectorFieldType.createKnnQuery(VectorData.fromFloats(new float[] { 0.0f, 0f, -129.0f }), 3, 3, null, null, null) ); assertThat( e.getMessage(), @@ -1704,7 +1705,7 @@ public void testByteVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 0.0f, 0.5f, 0.0f }, 3, 3, null, null, null) + () -> denseVectorFieldType.createKnnQuery(VectorData.fromFloats(new float[] { 0.0f, 0.5f, 0.0f }), 3, 3, null, null, null) ); assertThat( e.getMessage(), @@ -1713,7 +1714,7 @@ public void testByteVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 0, 0.0f, -0.25f }, 3, 3, null, null, null) + () -> denseVectorFieldType.createKnnQuery(VectorData.fromFloats(new float[] { 0, 0.0f, -0.25f }), 3, 3, null, null, null) ); assertThat( e.getMessage(), @@ -1722,13 +1723,20 @@ public void testByteVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { Float.NaN, 0f, 0.0f }, 3, 3, null, null, null) + () -> denseVectorFieldType.createKnnQuery(VectorData.fromFloats(new float[] { Float.NaN, 0f, 0.0f }), 3, 3, null, null, null) ); assertThat(e.getMessage(), containsString("element_type [byte] vectors do not support NaN values but found [NaN] at dim [0];")); e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { Float.POSITIVE_INFINITY, 0f, 0.0f }, 3, 3, null, null, null) + () -> denseVectorFieldType.createKnnQuery( + VectorData.fromFloats(new float[] { Float.POSITIVE_INFINITY, 0f, 0.0f }), + 3, + 3, + null, + null, + null + ) ); assertThat( e.getMessage(), @@ -1737,7 +1745,14 @@ public void testByteVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 0, Float.NEGATIVE_INFINITY, 0.0f }, 3, 3, null, null, null) + () -> denseVectorFieldType.createKnnQuery( + VectorData.fromFloats(new float[] { 0, Float.NEGATIVE_INFINITY, 0.0f }), + 3, + 3, + null, + null, + null + ) ); assertThat( e.getMessage(), @@ -1763,13 +1778,20 @@ public void testFloatVectorQueryBoundaries() throws IOException { Exception e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { Float.NaN, 0f, 0.0f }, 3, 3, null, null, null) + () -> denseVectorFieldType.createKnnQuery(VectorData.fromFloats(new float[] { Float.NaN, 0f, 0.0f }), 3, 3, null, null, null) ); assertThat(e.getMessage(), containsString("element_type [float] vectors do not support NaN values but found [NaN] at dim [0];")); e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { Float.POSITIVE_INFINITY, 0f, 0.0f }, 3, 3, null, null, null) + () -> denseVectorFieldType.createKnnQuery( + VectorData.fromFloats(new float[] { Float.POSITIVE_INFINITY, 0f, 0.0f }), + 3, + 3, + null, + null, + null + ) ); assertThat( e.getMessage(), @@ -1778,7 +1800,14 @@ public void testFloatVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 0, Float.NEGATIVE_INFINITY, 0.0f }, 3, 3, null, null, null) + () -> denseVectorFieldType.createKnnQuery( + VectorData.fromFloats(new float[] { 0, Float.NEGATIVE_INFINITY, 0.0f }), + 3, + 3, + null, + null, + null + ) ); assertThat( e.getMessage(), diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java index 371a01757c935..9ee895f6de003 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java @@ -165,7 +165,7 @@ public void testCreateNestedKnnQuery() { for (int i = 0; i < dims; i++) { queryVector[i] = randomFloat(); } - Query query = field.createKnnQuery(queryVector, 10, 10, null, null, producer); + Query query = field.createKnnQuery(VectorData.fromFloats(queryVector), 10, 10, null, null, producer); assertThat(query, instanceOf(DiversifyingChildrenFloatKnnVectorQuery.class)); } { @@ -251,7 +251,7 @@ public void testFloatCreateKnnQuery() { ); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> unindexedField.createKnnQuery(new float[] { 0.3f, 0.1f, 1.0f, 0.0f }, 10, 10, null, null, null) + () -> unindexedField.createKnnQuery(VectorData.fromFloats(new float[] { 0.3f, 0.1f, 1.0f, 0.0f }), 10, 10, null, null, null) ); assertThat(e.getMessage(), containsString("to perform knn search on field [f], its mapping must have [index] set to [true]")); @@ -267,7 +267,7 @@ public void testFloatCreateKnnQuery() { ); e = expectThrows( IllegalArgumentException.class, - () -> dotProductField.createKnnQuery(new float[] { 0.3f, 0.1f, 1.0f, 0.0f }, 10, 10, null, null, null) + () -> dotProductField.createKnnQuery(VectorData.fromFloats(new float[] { 0.3f, 0.1f, 1.0f, 0.0f }), 10, 10, null, null, null) ); assertThat(e.getMessage(), containsString("The [dot_product] similarity can only be used with unit-length vectors.")); @@ -283,7 +283,7 @@ public void testFloatCreateKnnQuery() { ); e = expectThrows( IllegalArgumentException.class, - () -> cosineField.createKnnQuery(new float[] { 0.0f, 0.0f, 0.0f, 0.0f }, 10, 10, null, null, null) + () -> cosineField.createKnnQuery(VectorData.fromFloats(new float[] { 0.0f, 0.0f, 0.0f, 0.0f }), 10, 10, null, null, null) ); assertThat(e.getMessage(), containsString("The [cosine] similarity does not support vectors with zero magnitude.")); } @@ -304,7 +304,7 @@ public void testCreateKnnQueryMaxDims() { for (int i = 0; i < 4096; i++) { queryVector[i] = randomFloat(); } - Query query = fieldWith4096dims.createKnnQuery(queryVector, 10, 10, null, null, null); + Query query = fieldWith4096dims.createKnnQuery(VectorData.fromFloats(queryVector), 10, 10, null, null, null); assertThat(query, instanceOf(KnnFloatVectorQuery.class)); } @@ -342,7 +342,7 @@ public void testByteCreateKnnQuery() { ); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> unindexedField.createKnnQuery(new float[] { 0.3f, 0.1f, 1.0f }, 10, 10, null, null, null) + () -> unindexedField.createKnnQuery(VectorData.fromFloats(new float[] { 0.3f, 0.1f, 1.0f }), 10, 10, null, null, null) ); assertThat(e.getMessage(), containsString("to perform knn search on field [f], its mapping must have [index] set to [true]")); @@ -358,7 +358,7 @@ public void testByteCreateKnnQuery() { ); e = expectThrows( IllegalArgumentException.class, - () -> cosineField.createKnnQuery(new float[] { 0.0f, 0.0f, 0.0f }, 10, 10, null, null, null) + () -> cosineField.createKnnQuery(VectorData.fromFloats(new float[] { 0.0f, 0.0f, 0.0f }), 10, 10, null, null, null) ); assertThat(e.getMessage(), containsString("The [cosine] similarity does not support vectors with zero magnitude.")); diff --git a/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java index d2a5859ae981f..a558081c2d16f 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java @@ -56,7 +56,7 @@ protected KnnScoreDocQueryBuilder doCreateTestQueryBuilder() { return new KnnScoreDocQueryBuilder( scoreDocs.toArray(new ScoreDoc[0]), randomBoolean() ? "field" : null, - randomBoolean() ? randomVector(10) : null + randomBoolean() ? VectorData.fromFloats(randomVector(10)) : null ); } @@ -65,7 +65,7 @@ public void testValidOutput() { KnnScoreDocQueryBuilder query = new KnnScoreDocQueryBuilder( new ScoreDoc[] { new ScoreDoc(0, 4.25f), new ScoreDoc(5, 1.6f) }, "field", - new float[] { 1.0f, 2.0f } + VectorData.fromFloats(new float[] { 1.0f, 2.0f }) ); String expected = """ { @@ -155,7 +155,7 @@ public void testRewriteToMatchNone() throws IOException { KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( new ScoreDoc[0], randomBoolean() ? "field" : null, - randomBoolean() ? randomVector(10) : null + randomBoolean() ? VectorData.fromFloats(randomVector(10)) : null ); QueryRewriteContext context = randomBoolean() ? new InnerHitsRewriteContext(createSearchExecutionContext().getParserConfig(), System::currentTimeMillis) @@ -169,7 +169,7 @@ public void testRewriteForInnerHits() throws IOException { KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( new ScoreDoc[] { new ScoreDoc(0, 4.25f), new ScoreDoc(5, 1.6f) }, randomAlphaOfLength(10), - randomVector(10) + VectorData.fromFloats(randomVector(10)) ); queryBuilder.boost(randomFloat()); queryBuilder.queryName(randomAlphaOfLength(10)); @@ -218,7 +218,11 @@ public void testScoreDocQueryWeightCount() throws IOException { } ScoreDoc[] scoreDocs = scoreDocsList.toArray(new ScoreDoc[0]); - KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder(scoreDocs, "field", randomVector(10)); + KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( + scoreDocs, + "field", + VectorData.fromFloats(randomVector(10)) + ); Query query = queryBuilder.doToQuery(context); final Weight w = query.createWeight(searcher, ScoreMode.TOP_SCORES, 1.0f); for (LeafReaderContext leafReaderContext : searcher.getLeafContexts()) { @@ -261,7 +265,11 @@ public void testScoreDocQuery() throws IOException { } ScoreDoc[] scoreDocs = scoreDocsList.toArray(new ScoreDoc[0]); - KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder(scoreDocs, "field", randomVector(10)); + KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( + scoreDocs, + "field", + VectorData.fromFloats(randomVector(10)) + ); final Query query = queryBuilder.doToQuery(context); final Weight w = query.createWeight(searcher, ScoreMode.TOP_SCORES, 1.0f); From aca03a5bff2ddb8b4709fe782c18a8e2ca32873a Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Thu, 18 Jul 2024 15:22:26 +0200 Subject: [PATCH 57/65] [ESQL] Minor refactor for DataType (#111016) * Remove duplicate functions from EsqlDataTypes * Remove unused methods from DataType --- .../xpack/esql/core/type/DataType.java | 15 --------------- .../org/elasticsearch/xpack/esql/Column.java | 3 +-- .../xpack/esql/analysis/Verifier.java | 2 +- .../xpack/esql/enrich/EnrichLookupService.java | 3 +-- .../xpack/esql/enrich/EnrichPolicyResolver.java | 4 ++-- .../scalar/convert/AbstractConvertFunction.java | 3 +-- .../function/scalar/date/DateExtract.java | 3 +-- .../function/scalar/date/DateFormat.java | 3 +-- .../function/scalar/date/DateParse.java | 3 +-- .../comparison/InsensitiveEqualsMapper.java | 3 +-- .../xpack/esql/type/EsqlDataTypes.java | 16 +++------------- .../scalar/multivalue/MvConcatTests.java | 5 ++--- .../spatial/BinarySpatialFunctionTestCase.java | 2 +- .../xpack/esql/type/MultiTypeEsFieldTests.java | 2 +- 14 files changed, 17 insertions(+), 50 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index f59af4a1282cc..f7f9d238ec5ce 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -229,25 +229,10 @@ public static boolean isNullOrNumeric(DataType t) { return t.isNumeric() || isNull(t); } - public static boolean isSigned(DataType t) { - return t.isNumeric() && t.equals(UNSIGNED_LONG) == false; - } - public static boolean isDateTime(DataType type) { return type == DATETIME; } - public static boolean areCompatible(DataType left, DataType right) { - if (left == right) { - return true; - } else { - return (left == NULL || right == NULL) - || (isString(left) && isString(right)) - || (left.isNumeric() && right.isNumeric()) - || (isDateTime(left) && isDateTime(right)); - } - } - public String nameUpper() { return name; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/Column.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/Column.java index a19dafba1559b..6287bf54ce5b0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/Column.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/Column.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Releasables; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.planner.PlannerUtils; -import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.io.IOException; @@ -28,7 +27,7 @@ public record Column(DataType type, Block values) implements Releasable, Writeab } public Column(BlockStreamInput in) throws IOException { - this(EsqlDataTypes.fromTypeName(in.readString()), in.readNamedWriteable(Block.class)); + this(DataType.fromTypeName(in.readString()), in.readNamedWriteable(Block.class)); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index a4e0d99b0d3fc..4dfdb107e5bac 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -343,7 +343,7 @@ private static void checkRegexExtractOnlyOnStrings(LogicalPlan p, Set f if (p instanceof RegexExtract re) { Expression expr = re.input(); DataType type = expr.dataType(); - if (EsqlDataTypes.isString(type) == false) { + if (DataType.isString(type) == false) { failures.add( fail( expr, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java index 2425fa24b17c2..0f80fb809fa0b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java @@ -83,7 +83,6 @@ import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; import org.elasticsearch.xpack.esql.planner.PlannerUtils; import org.elasticsearch.xpack.esql.plugin.EsqlPlugin; -import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.io.IOException; import java.util.ArrayList; @@ -467,7 +466,7 @@ private static class LookupRequest extends TransportRequest implements IndicesRe String inputDataType = (in.getTransportVersion().onOrAfter(TransportVersions.ESQL_EXTENDED_ENRICH_INPUT_TYPE)) ? in.readString() : "unknown"; - this.inputDataType = EsqlDataTypes.fromTypeName(inputDataType); + this.inputDataType = DataType.fromTypeName(inputDataType); this.matchType = in.readString(); this.matchField = in.readString(); try (BlockStreamInput bsi = new BlockStreamInput(in, blockFactory)) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java index 82eda9679074d..2d42241d77ada 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java @@ -37,11 +37,11 @@ import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.analysis.EnrichResolution; import org.elasticsearch.xpack.esql.core.index.EsIndex; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.util.StringUtils; import org.elasticsearch.xpack.esql.plan.logical.Enrich; import org.elasticsearch.xpack.esql.session.IndexResolver; -import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.io.IOException; import java.util.ArrayList; @@ -192,7 +192,7 @@ private Tuple mergeLookupResults( EsField field = m.getValue(); field = new EsField( field.getName(), - EsqlDataTypes.fromTypeName(field.getDataType().typeName()), + DataType.fromTypeName(field.getDataType().typeName()), field.getProperties(), field.isAggregatable(), field.isAlias() diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java index 0fed02f89fd92..b731a400deba3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java @@ -26,7 +26,6 @@ import org.elasticsearch.xpack.esql.expression.function.Warnings; import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.io.IOException; import java.util.ArrayList; @@ -50,7 +49,7 @@ public abstract class AbstractConvertFunction extends UnaryScalarFunction { // the numeric types convert functions need to handle; the other numeric types are converted upstream to one of these private static final List NUMERIC_TYPES = List.of(DataType.INTEGER, DataType.LONG, DataType.UNSIGNED_LONG, DataType.DOUBLE); - public static final List STRING_TYPES = DataType.types().stream().filter(EsqlDataTypes::isString).toList(); + public static final List STRING_TYPES = DataType.types().stream().filter(DataType::isString).toList(); protected AbstractConvertFunction(Source source, Expression field) { super(source, field); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateExtract.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateExtract.java index 5a57e98be38b9..f9dcdeb342cb5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateExtract.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateExtract.java @@ -26,7 +26,6 @@ import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlConfigurationFunction; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.io.IOException; import java.time.ZoneId; @@ -129,7 +128,7 @@ private ChronoField chronoField() { if (chronoField == null) { Expression field = children().get(0); try { - if (field.foldable() && EsqlDataTypes.isString(field.dataType())) { + if (field.foldable() && DataType.isString(field.dataType())) { chronoField = (ChronoField) STRING_TO_CHRONO_FIELD.convert(field.fold()); } } catch (Exception e) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateFormat.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateFormat.java index 84a1a6e77ea73..bfc1bbaa5101d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateFormat.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateFormat.java @@ -27,7 +27,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlConfigurationFunction; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.session.EsqlConfiguration; -import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.io.IOException; import java.util.List; @@ -146,7 +145,7 @@ public ExpressionEvaluator.Factory toEvaluator(Function new InsensitiveEqualsConstantEvaluator( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java index 8a75d3f379dd3..aad9470ecbbb7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java @@ -8,13 +8,10 @@ import org.elasticsearch.xpack.esql.core.type.DataType; -import java.util.Locale; - import static org.elasticsearch.xpack.esql.core.type.DataType.BYTE; import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; import static org.elasticsearch.xpack.esql.core.type.DataType.HALF_FLOAT; -import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.NESTED; import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; import static org.elasticsearch.xpack.esql.core.type.DataType.OBJECT; @@ -22,7 +19,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.SCALED_FLOAT; import static org.elasticsearch.xpack.esql.core.type.DataType.SHORT; import static org.elasticsearch.xpack.esql.core.type.DataType.SOURCE; -import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; import static org.elasticsearch.xpack.esql.core.type.DataType.isNull; @@ -31,14 +27,6 @@ public final class EsqlDataTypes { private EsqlDataTypes() {} - public static DataType fromTypeName(String name) { - return DataType.fromTypeName(name.toLowerCase(Locale.ROOT)); - } - - public static boolean isString(DataType t) { - return t == KEYWORD || t == TEXT; - } - public static boolean isPrimitive(DataType t) { return t != OBJECT && t != NESTED; } @@ -98,7 +86,9 @@ public static boolean areCompatible(DataType left, DataType right) { if (left == right) { return true; } else { - return (left == NULL || right == NULL) || (isString(left) && isString(right)) || (left.isNumeric() && right.isNumeric()); + return (left == NULL || right == NULL) + || (DataType.isString(left) && DataType.isString(right)) + || (left.isNumeric() && right.isNumeric()); } } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvConcatTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvConcatTests.java index 0277093152cba..3c668b6f41d4b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvConcatTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvConcatTests.java @@ -16,7 +16,6 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; -import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.util.ArrayList; import java.util.List; @@ -33,11 +32,11 @@ public MvConcatTests(@Name("TestCase") Supplier testC public static Iterable parameters() { List suppliers = new ArrayList<>(); for (DataType fieldType : DataType.types()) { - if (EsqlDataTypes.isString(fieldType) == false) { + if (DataType.isString(fieldType) == false) { continue; } for (DataType delimType : DataType.types()) { - if (EsqlDataTypes.isString(delimType) == false) { + if (DataType.isString(delimType) == false) { continue; } for (int l = 1; l < 10; l++) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/spatial/BinarySpatialFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/spatial/BinarySpatialFunctionTestCase.java index a30cce9f765ed..4ab1517d0c17a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/spatial/BinarySpatialFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/spatial/BinarySpatialFunctionTestCase.java @@ -27,10 +27,10 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static org.elasticsearch.xpack.esql.core.type.DataType.isString; import static org.elasticsearch.xpack.esql.expression.function.scalar.spatial.SpatialRelatesFunction.compatibleTypeNames; import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isSpatial; import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isSpatialGeo; -import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isString; import static org.hamcrest.Matchers.equalTo; public abstract class BinarySpatialFunctionTestCase extends AbstractScalarFunctionTestCase { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java index bebfcd7f7bdbc..0fa8719f17744 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java @@ -42,7 +42,7 @@ import java.util.List; import java.util.Map; -import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isString; +import static org.elasticsearch.xpack.esql.core.type.DataType.isString; /** * This test was originally based on the tests for sub-classes of EsField, like InvalidMappedFieldTests. From 39aa832400b526f41e0019029e42d04efbfcdcb5 Mon Sep 17 00:00:00 2001 From: Enrico Zimuel Date: Thu, 18 Jul 2024 15:24:36 +0200 Subject: [PATCH 58/65] Changed security API endpoints to stable (#110862) --- .../api/security.create_cross_cluster_api_key.json | 2 +- .../api/security.update_cross_cluster_api_key.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/security.create_cross_cluster_api_key.json b/rest-api-spec/src/main/resources/rest-api-spec/api/security.create_cross_cluster_api_key.json index 6fd74f9eba3e3..88d6b97067492 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/security.create_cross_cluster_api_key.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/security.create_cross_cluster_api_key.json @@ -4,7 +4,7 @@ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-cross-cluster-api-key.html", "description": "Creates a cross-cluster API key for API key based remote cluster access." }, - "stability": "beta", + "stability": "stable", "visibility": "public", "headers": { "accept": [ diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/security.update_cross_cluster_api_key.json b/rest-api-spec/src/main/resources/rest-api-spec/api/security.update_cross_cluster_api_key.json index 9428089a31e80..e59d6c1efccf8 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/security.update_cross_cluster_api_key.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/security.update_cross_cluster_api_key.json @@ -4,7 +4,7 @@ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-update-cross-cluster-api-key.html", "description": "Updates attributes of an existing cross-cluster API key." }, - "stability": "beta", + "stability": "stable", "visibility": "public", "headers": { "accept": [ From 15ce82de66c57255ca53b3cf58f9bdaa7db3f9d9 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Thu, 18 Jul 2024 09:58:42 -0400 Subject: [PATCH 59/65] Unmuting #110999 after #111029 (#111052) this is now fixed after #111029 closes https://github.com/elastic/elasticsearch/issues/110999 --- muted-tests.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 3e64358a98503..46b63ababd756 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -102,9 +102,6 @@ tests: method: "testNotMatchSome {p0=StandardSetup[fieldType=keyword, multivaluedField=true, empty=true, count=100]}" - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT issue: https://github.com/elastic/elasticsearch/issues/110978 -- class: org.elasticsearch.search.sort.FieldSortIT - method: testIssue6614 - issue: https://github.com/elastic/elasticsearch/issues/110999 - class: org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderIT method: testEnterpriseDownloaderTask issue: https://github.com/elastic/elasticsearch/issues/111002 From ee8a0e8c000c18e6f03c19b7a1b23fee35ec0f98 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Thu, 18 Jul 2024 16:09:53 +0200 Subject: [PATCH 60/65] [ESQL] Run SingleValueMatchQuery approximation last (#111039) Currently we run SingleValueQueries as a filter composed of the original and a doc values queries that filter multivalues. It might happen that the latter drives the iteration resulting in warnings of multivalue fields even if the query does not match any. This PR proposes to increase the match cost for the doc values query so it always run last. This makes it easier to test and it should never give false warnings. fixes https://github.com/elastic/elasticsearch/issues/110977 --- muted-tests.yml | 5 ----- .../querydsl/query/SingleValueMatchQuery.java | 7 +++---- .../querydsl/query/SingleValueQueryTests.java | 15 ++++++--------- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 46b63ababd756..718505effd4f6 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -97,9 +97,6 @@ tests: - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT method: "test {stats.Count_or_null SYNC #2}" issue: https://github.com/elastic/elasticsearch/issues/110950 -- class: "org.elasticsearch.xpack.esql.querydsl.query.SingleValueQueryTests" - issue: "https://github.com/elastic/elasticsearch/issues/110977" - method: "testNotMatchSome {p0=StandardSetup[fieldType=keyword, multivaluedField=true, empty=true, count=100]}" - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT issue: https://github.com/elastic/elasticsearch/issues/110978 - class: org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderIT @@ -108,8 +105,6 @@ tests: - class: org.elasticsearch.datastreams.DataStreamsClientYamlTestSuiteIT method: test {p0=data_stream/190_failure_store_redirection/Ensure failure is redirected to correct failure store after a reroute processor} issue: https://github.com/elastic/elasticsearch/issues/111041 -- class: org.elasticsearch.xpack.esql.querydsl.query.SingleValueQueryTests - issue: https://github.com/elastic/elasticsearch/issues/111042 - class: org.elasticsearch.compute.lucene.ValueSourceReaderTypeConversionTests method: testLoadAllStatusAllInOnePage issue: https://github.com/elastic/elasticsearch/issues/111048 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMatchQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMatchQuery.java index ac75a58baaa06..386c983c8e6af 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMatchQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMatchQuery.java @@ -42,11 +42,10 @@ final class SingleValueMatchQuery extends Query { /** - * The estimated number of comparisons to check if a {@link DocValues} - * has more than one value. There isn't a good way to get that number out of - * {@link DocValues} so this is a guess. + * Choose a big enough value so this approximation never drives the iteration. + * This avoids reporting warnings when queries are not matching multi-values */ - private static final int MULTI_VALUE_MATCH_COST = 10; + private static final int MULTI_VALUE_MATCH_COST = 1000; private static final IllegalArgumentException MULTI_VALUE_EXCEPTION = new IllegalArgumentException( "single-value function encountered multi-value" ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java index 5d89ef3350193..2ba397a3cb3de 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java @@ -76,7 +76,7 @@ public void testMatchSome() throws IOException { int max = between(1, 100); testCase( new SingleValueQuery.Builder(new RangeQueryBuilder("i").lt(max), "foo", Source.EMPTY), - (fieldValues, count) -> runCase(fieldValues, count, null, max, false) + (fieldValues, count) -> runCase(fieldValues, count, null, max) ); } @@ -116,7 +116,7 @@ public void testNotMatchSome() throws IOException { int max = between(1, 100); testCase( new SingleValueQuery(new RangeQuery(Source.EMPTY, "i", null, false, max, false, null), "foo").negate(Source.EMPTY).asBuilder(), - (fieldValues, count) -> runCase(fieldValues, count, max, 100, true) + (fieldValues, count) -> runCase(fieldValues, count, max, 100) ); } @@ -132,10 +132,8 @@ interface TestCase { * @param count The count of the docs the query matched. * @param docsStart The start of the slice in fieldValues we want to consider. If `null`, the start will be 0. * @param docsStop The end of the slice in fieldValues we want to consider. If `null`, the end will be the fieldValues size. - * @param scanForMVs Should the check for Warnings scan the entire fieldValues? This will override the docsStart:docsStop interval, - * which is needed for some cases. */ - private void runCase(List> fieldValues, int count, Integer docsStart, Integer docsStop, boolean scanForMVs) { + private void runCase(List> fieldValues, int count, Integer docsStart, Integer docsStop) { int expected = 0; int min = docsStart != null ? docsStart : 0; int max = docsStop != null ? docsStop : fieldValues.size(); @@ -150,9 +148,8 @@ private void runCase(List> fieldValues, int count, Integer docsStar } assertThat(count, equalTo(expected)); - // the SingleValueQuery.TwoPhaseIteratorForSortedNumericsAndTwoPhaseQueries can scan all docs - and generate warnings - even if - // inner query matches none, so warn if MVs have been encountered within given range, OR if a full scan is required - if (mvCountInRange > 0 || (scanForMVs && fieldValues.stream().anyMatch(x -> x.size() > 1))) { + // we should only have warnings if we have matched a multi-value + if (mvCountInRange > 0) { assertWarnings( "Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.", "Line -1:-1: java.lang.IllegalArgumentException: single-value function encountered multi-value" @@ -161,7 +158,7 @@ private void runCase(List> fieldValues, int count, Integer docsStar } private void runCase(List> fieldValues, int count) { - runCase(fieldValues, count, null, null, false); + runCase(fieldValues, count, null, null); } private void testCase(SingleValueQuery.Builder builder, TestCase testCase) throws IOException { From 50a7a0868c669a429a8bb506e2b958c2a5855e91 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Thu, 18 Jul 2024 16:12:10 +0200 Subject: [PATCH 61/65] Speed up collecting zero document string terms (#110922) Use segment ordinals when possible to collect zero document buckets --- docs/changelog/110922.yaml | 5 ++ .../terms/MapStringTermsAggregator.java | 68 +++++++++++++++---- .../bucket/terms/TermsAggregatorTests.java | 59 ++++++++++++++++ .../DocumentLevelSecurityTests.java | 4 ++ 4 files changed, 124 insertions(+), 12 deletions(-) create mode 100644 docs/changelog/110922.yaml diff --git a/docs/changelog/110922.yaml b/docs/changelog/110922.yaml new file mode 100644 index 0000000000000..6a85ce57de103 --- /dev/null +++ b/docs/changelog/110922.yaml @@ -0,0 +1,5 @@ +pr: 110922 +summary: Speed up collecting zero document string terms +area: Aggregations +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java index 9cea884667325..936fcf2edc225 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java @@ -9,7 +9,10 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.PriorityQueue; @@ -419,25 +422,66 @@ void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedD } // we need to fill-in the blanks for (LeafReaderContext ctx : searcher().getTopReaderContext().leaves()) { - SortedBinaryDocValues values = valuesSource.bytesValues(ctx); - // brute force - for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) { - if (excludeDeletedDocs && ctx.reader().getLiveDocs() != null && ctx.reader().getLiveDocs().get(docId) == false) { - continue; + final Bits liveDocs = excludeDeletedDocs ? ctx.reader().getLiveDocs() : null; + if (liveDocs == null && valuesSource.hasOrdinals()) { + final SortedSetDocValues values = ((ValuesSource.Bytes.WithOrdinals) valuesSource).ordinalsValues(ctx); + collectZeroDocEntries(values, owningBucketOrd); + } else { + final SortedBinaryDocValues values = valuesSource.bytesValues(ctx); + final BinaryDocValues singleton = FieldData.unwrapSingleton(values); + if (singleton != null) { + collectZeroDocEntries(singleton, liveDocs, ctx.reader().maxDoc(), owningBucketOrd); + } else { + collectZeroDocEntries(values, liveDocs, ctx.reader().maxDoc(), owningBucketOrd); } - if (values.advanceExact(docId)) { - int valueCount = values.docValueCount(); - for (int i = 0; i < valueCount; ++i) { - BytesRef term = values.nextValue(); - if (includeExclude == null || includeExclude.accept(term)) { - bucketOrds.add(owningBucketOrd, term); - } + } + } + } + + private void collectZeroDocEntries(SortedSetDocValues values, long owningBucketOrd) throws IOException { + final TermsEnum termsEnum = values.termsEnum(); + BytesRef term; + while ((term = termsEnum.next()) != null) { + if (includeExclude == null || includeExclude.accept(term)) { + bucketOrds.add(owningBucketOrd, term); + } + } + } + + private void collectZeroDocEntries(SortedBinaryDocValues values, Bits liveDocs, int maxDoc, long owningBucketOrd) + throws IOException { + // brute force + for (int docId = 0; docId < maxDoc; ++docId) { + if (liveDocs != null && liveDocs.get(docId) == false) { + continue; + } + if (values.advanceExact(docId)) { + final int valueCount = values.docValueCount(); + for (int i = 0; i < valueCount; ++i) { + final BytesRef term = values.nextValue(); + if (includeExclude == null || includeExclude.accept(term)) { + bucketOrds.add(owningBucketOrd, term); } } } } } + private void collectZeroDocEntries(BinaryDocValues values, Bits liveDocs, int maxDoc, long owningBucketOrd) throws IOException { + // brute force + for (int docId = 0; docId < maxDoc; ++docId) { + if (liveDocs != null && liveDocs.get(docId) == false) { + continue; + } + if (values.advanceExact(docId)) { + final BytesRef term = values.binaryValue(); + if (includeExclude == null || includeExclude.accept(term)) { + bucketOrds.add(owningBucketOrd, term); + } + } + } + } + @Override Supplier emptyBucketBuilder(long owningBucketOrd) { return () -> new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 788249fee1187..27f0b21d2767f 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -329,6 +329,65 @@ public void testStringShardMinDocCount() throws IOException { } } + public void testStringShardZeroMinDocCount() throws IOException { + MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", true, true, Collections.emptyMap()); + for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { + TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").field("string") + .executionHint(executionMode.toString()) + .size(2) + .minDocCount(0) + .executionHint("map") + .excludeDeletedDocs(true) + .order(BucketOrder.key(true)); + + { + boolean delete = randomBoolean(); + // force single shard/segment + testCase(iw -> { + // force single shard/segment + iw.addDocuments(Arrays.asList(doc(fieldType, "a"), doc(fieldType, "b"), doc(fieldType, "c"), doc(fieldType, "d"))); + if (delete) { + iw.deleteDocuments(new TermQuery(new Term("string", "b"))); + } + }, (InternalTerms result) -> { + assertEquals(2, result.getBuckets().size()); + assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(0L, result.getBuckets().get(0).getDocCount()); + if (delete) { + assertEquals("c", result.getBuckets().get(1).getKeyAsString()); + } else { + assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + } + assertEquals(0L, result.getBuckets().get(1).getDocCount()); + }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))); + } + + { + boolean delete = randomBoolean(); + // force single shard/segment + testCase(iw -> { + // force single shard/segment + iw.addDocuments( + Arrays.asList(doc(fieldType, "a"), doc(fieldType, "c", "d"), doc(fieldType, "b", "d"), doc(fieldType, "b")) + ); + if (delete) { + iw.deleteDocuments(new TermQuery(new Term("string", "b"))); + } + }, (InternalTerms result) -> { + assertEquals(2, result.getBuckets().size()); + assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(0L, result.getBuckets().get(0).getDocCount()); + if (delete) { + assertEquals("c", result.getBuckets().get(1).getKeyAsString()); + } else { + assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + } + assertEquals(0L, result.getBuckets().get(1).getDocCount()); + }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))); + } + } + } + public void testManyTerms() throws Exception { MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", randomBoolean(), true, Collections.emptyMap()); TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").executionHint(randomHint()).field("string"); diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityTests.java index 704d8b75d9ed3..c0866fa7ea694 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityTests.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityTests.java @@ -1013,6 +1013,10 @@ public void testZeroMinDocAggregation() throws Exception { prepareIndex("test").setId("2").setSource("color", "yellow", "fruit", "banana", "count", -2).setRefreshPolicy(IMMEDIATE).get(); prepareIndex("test").setId("3").setSource("color", "green", "fruit", "grape", "count", -3).setRefreshPolicy(IMMEDIATE).get(); prepareIndex("test").setId("4").setSource("color", "red", "fruit", "grape", "count", -4).setRefreshPolicy(IMMEDIATE).get(); + prepareIndex("test").setId("5") + .setSource("color", new String[] { "green", "black" }, "fruit", "grape", "count", -5) + .setRefreshPolicy(IMMEDIATE) + .get(); indicesAdmin().prepareForceMerge("test").get(); assertResponse( From 17274323eb584482331b152ade3fa8d2889546ea Mon Sep 17 00:00:00 2001 From: Najwa Harif <90753689+naj-h@users.noreply.github.com> Date: Thu, 18 Jul 2024 16:20:15 +0200 Subject: [PATCH 62/65] Make skip_unavailable=true a notable change instead of breaking change (#110983) --- docs/changelog/105792.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/105792.yaml b/docs/changelog/105792.yaml index 2ad5aa970c214..b9190e60cc96d 100644 --- a/docs/changelog/105792.yaml +++ b/docs/changelog/105792.yaml @@ -15,4 +15,4 @@ breaking: as SKIPPED in the search response metadata section and do not fail the entire search. If users want to ensure that a search returns a failure when a particular remote cluster is not available, `skip_unavailable` must be now be set explicitly. - notable: false + notable: true From e33a8dc686ff6c7fbb888d766d759ca31771718f Mon Sep 17 00:00:00 2001 From: Niels Bauman <33722607+nielsbauman@users.noreply.github.com> Date: Thu, 18 Jul 2024 17:54:42 +0200 Subject: [PATCH 63/65] Fix allowed warning during template creation in YAML test (#111059) Fixes #111041 --- .../test/data_stream/190_failure_store_redirection.yml | 2 +- muted-tests.yml | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/190_failure_store_redirection.yml b/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/190_failure_store_redirection.yml index 620120bbe7ee7..54ce32eb13207 100644 --- a/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/190_failure_store_redirection.yml +++ b/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/190_failure_store_redirection.yml @@ -250,7 +250,7 @@ teardown: - do: allowed_warnings: - - "index template [destination_template] has index patterns [destination*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [destination_template] will take precedence during new index creation" + - "index template [destination_template] has index patterns [destination-data-stream] matching patterns from existing older templates [global] with patterns (global => [*]); this template [destination_template] will take precedence during new index creation" indices.put_index_template: name: destination_template body: diff --git a/muted-tests.yml b/muted-tests.yml index 718505effd4f6..0c98d2bf051c9 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -102,9 +102,6 @@ tests: - class: org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderIT method: testEnterpriseDownloaderTask issue: https://github.com/elastic/elasticsearch/issues/111002 -- class: org.elasticsearch.datastreams.DataStreamsClientYamlTestSuiteIT - method: test {p0=data_stream/190_failure_store_redirection/Ensure failure is redirected to correct failure store after a reroute processor} - issue: https://github.com/elastic/elasticsearch/issues/111041 - class: org.elasticsearch.compute.lucene.ValueSourceReaderTypeConversionTests method: testLoadAllStatusAllInOnePage issue: https://github.com/elastic/elasticsearch/issues/111048 From 548aea56ce48e95cf79a201d75a784bb9f17d45c Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 18 Jul 2024 18:43:23 +0200 Subject: [PATCH 64/65] ESQL: Skip retrofitted tests (#111019) --- muted-tests.yml | 5 ----- .../src/main/resources/enrich.csv-spec | 13 ++++++------- .../testFixtures/src/main/resources/eval.csv-spec | 2 +- .../testFixtures/src/main/resources/keep.csv-spec | 10 +++++----- .../src/main/resources/stats.csv-spec | 15 ++++++--------- .../xpack/esql/action/EsqlCapabilities.java | 5 ----- 6 files changed, 18 insertions(+), 32 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 0c98d2bf051c9..e429bf0e1b6bf 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -94,11 +94,6 @@ tests: - class: org.elasticsearch.nativeaccess.VectorSystemPropertyTests method: testSystemPropertyDisabled issue: https://github.com/elastic/elasticsearch/issues/110949 -- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT - method: "test {stats.Count_or_null SYNC #2}" - issue: https://github.com/elastic/elasticsearch/issues/110950 -- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT - issue: https://github.com/elastic/elasticsearch/issues/110978 - class: org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderIT method: testEnterpriseDownloaderTask issue: https://github.com/elastic/elasticsearch/issues/111002 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index cf32e028b23bc..ab2ddb84ed969 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -69,7 +69,7 @@ ROW left = "left", foo = "foo", client_ip = "172.21.0.5", env = "env", right = " left:keyword | client_ip:keyword | env:keyword | right:keyword | foo:keyword ; -shadowingSubfields +shadowingSubfields#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] required_capability: enrich_load FROM addresses | KEEP city.country.continent.planet.name, city.country.name, city.name @@ -84,8 +84,7 @@ United States of America | South San Francisco | San Francisco Int'l Japan | Tokyo | null ; -shadowingSubfieldsLimit0 -required_capability: enrich_load +shadowingSubfieldsLimit0#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] FROM addresses | KEEP city.country.continent.planet.name, city.country.name, city.name | EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") @@ -135,7 +134,7 @@ ROW left = "left", airport = "Zurich Airport ZRH", city = "Zürich", middle = "m left:keyword | city:keyword | middle:keyword | right:keyword | airport:text | region:text | city_boundary:geo_shape ; -shadowingInternal +shadowingInternal#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] required_capability: enrich_load ROW city = "Zürich" | ENRICH city_names ON city WITH x = airport, x = region @@ -145,7 +144,7 @@ city:keyword | x:text Zürich | Bezirk Zürich ; -shadowingInternalImplicit +shadowingInternalImplicit#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] required_capability: enrich_load ROW city = "Zürich" | ENRICH city_names ON city WITH airport = region @@ -155,7 +154,7 @@ city:keyword | airport:text Zürich | Bezirk Zürich ; -shadowingInternalImplicit2 +shadowingInternalImplicit2#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] required_capability: enrich_load ROW city = "Zürich" | ENRICH city_names ON city WITH airport, airport = region @@ -165,7 +164,7 @@ city:keyword | airport:text Zürich | Bezirk Zürich ; -shadowingInternalImplicit3 +shadowingInternalImplicit3#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] required_capability: enrich_load ROW city = "Zürich" | ENRICH city_names ON city WITH airport = region, airport diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec index 87f54fbf0f174..770358e5120da 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec @@ -15,7 +15,7 @@ left:keyword | right:keyword | x:integer left | right | 1 ; -shadowingSubfields +shadowingSubfields#[skip:-8.13.3,reason:fixed in 8.13] FROM addresses | KEEP city.country.continent.planet.name, city.country.name, city.name | EVAL city.country.continent.planet.name = to_upper(city.country.continent.planet.name) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec index bcce35eb81e0f..6bc534a9fd918 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec @@ -540,7 +540,7 @@ c:i 1 ; -shadowingInternal +shadowingInternal#[skip:-8.13.3,reason:fixed in 8.13] FROM employees | SORT emp_no ASC | KEEP last_name, emp_no, last_name @@ -552,7 +552,7 @@ emp_no:integer | last_name:keyword 10002 | Simmel ; -shadowingInternalWildcard +shadowingInternalWildcard#[skip:-8.13.3,reason:fixed in 8.13] FROM employees | SORT emp_no ASC | KEEP last*name, emp_no, last*name, first_name, last*, gender, last* @@ -564,7 +564,7 @@ emp_no:integer | first_name:keyword | gender:keyword | last_name:keyword 10002 | Bezalel | F | Simmel ; -shadowingInternalWildcardAndExplicit +shadowingInternalWildcardAndExplicit#[skip:-8.13.3,reason:fixed in 8.13] FROM employees | SORT emp_no ASC | KEEP last*name, emp_no, last_name, first_name, last*, languages, last_name, gender, last*name @@ -576,7 +576,7 @@ emp_no:integer | first_name:keyword | languages:integer | last_name:keyword | ge 10002 | Bezalel | 5 | Simmel | F ; -shadowingSubfields +shadowingSubfields#[skip:-8.13.3,reason:fixed in 8.13] FROM addresses | KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name, city.country.continent.planet.name | SORT city.name @@ -588,7 +588,7 @@ North America | United States of America | San Francisco Asia | Japan | Tokyo | Earth ; -shadowingSubfieldsWildcard +shadowingSubfieldsWildcard#[skip:-8.13.3,reason:fixed in 8.13] FROM addresses | KEEP *name, city.country.continent.planet.name | SORT city.name diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index b64dcf7bf5ca4..96d0de9bef2b1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -1158,7 +1158,7 @@ word_count:long // end::docsCountWithExpression-result[] ; -count_or_null +count_where#[skip:-8.12.1,reason:implemented in 8.12] // tag::count-where[] ROW n=1 | WHERE n < 0 @@ -1173,7 +1173,7 @@ COUNT(n):long ; -count_or_null +count_or_null#[skip:-8.14.1,reason:implemented in 8.14] // tag::count-or-null[] ROW n=1 | STATS COUNT(n > 0 OR NULL), COUNT(n < 0 OR NULL) @@ -1895,7 +1895,7 @@ x:integer 10001 ; -shadowingInternalWithGroup +shadowingInternalWithGroup#[skip:-8.14.1,reason:implemented in 8.14] FROM employees | STATS x = MAX(emp_no), x = MIN(emp_no) BY x = gender | SORT x ASC @@ -1947,8 +1947,7 @@ MIN(i):integer | a:keyword | b:integer // end::multi-mv-group-result[] ; -statsByConstant -required_capability: stats_by_constant +statsByConstant#[skip:-8.14.1,reason:implemented in 8.14] from employees | stats m = max(salary), a = round(avg(salary)) by 0 ; @@ -1957,8 +1956,7 @@ m:integer |a:double |0:integer 74999 |48249.0 |0 ; -statsByConstantFromStats -required_capability: stats_by_constant +statsByConstantFromStats#[skip:-8.12.1,reason:implemented in 8.12] from employees | stats c = count(languages) | stats a = count(*) by c @@ -1968,8 +1966,7 @@ a:long |c:long 1 |90 ; -statsByConstantFromEval -required_capability: stats_by_constant +statsByConstantFromEval#[skip:-8.14.1,reason:implemented in 8.14] from employees | eval x = 0 | stats m = max(salary), a = round(avg(salary)) by x diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 98c6d8f4332be..477ce8bddd531 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -156,11 +156,6 @@ public enum Cap { */ RANGEQUERY_FOR_DATETIME, - /** - * Add tests for #105383, STATS BY constant. - */ - STATS_BY_CONSTANT, - /** * Fix for non-unique attribute names in ROW and logical plans. * https://github.com/elastic/elasticsearch/issues/110541 From 006f36eaf3f61ed09a736885807747fa49837dfe Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Thu, 18 Jul 2024 12:46:01 -0400 Subject: [PATCH 65/65] [Transform] Allow task canceling of validate API calls (#110951) Validate will initiate a search request. In the event that the search request needs to be cancelled, rather than manually stopping the task, cancelling the Validate task will now propagate the cancel request to the Search task. Relate #88010 Co-authored-by: Elastic Machine --- docs/changelog/110951.yaml | 5 +++++ .../transform/action/PutTransformAction.java | 9 +++++++++ .../transform/action/StartTransformAction.java | 9 +++++++++ .../action/ValidateTransformAction.java | 8 ++++++++ .../action/TransportPutTransformAction.java | 6 +++++- .../action/TransportStartTransformAction.java | 18 +++++++++++------- .../TransportValidateTransformAction.java | 14 ++++++++------ .../rest/action/RestPutTransformAction.java | 7 ++++++- .../rest/action/RestStartTransformAction.java | 7 ++++++- 9 files changed, 67 insertions(+), 16 deletions(-) create mode 100644 docs/changelog/110951.yaml diff --git a/docs/changelog/110951.yaml b/docs/changelog/110951.yaml new file mode 100644 index 0000000000000..ec8bc9cae6347 --- /dev/null +++ b/docs/changelog/110951.yaml @@ -0,0 +1,5 @@ +pr: 110951 +summary: Allow task canceling of validate API calls +area: Transform +type: bug +issues: [] diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/PutTransformAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/PutTransformAction.java index 496e826651572..f9fde6b6816e0 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/PutTransformAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/PutTransformAction.java @@ -14,6 +14,9 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.tasks.CancellableTask; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.tasks.TaskId; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xpack.core.common.validation.SourceDestValidator; import org.elasticsearch.xpack.core.transform.TransformField; @@ -22,6 +25,7 @@ import org.elasticsearch.xpack.core.transform.utils.TransformStrings; import java.io.IOException; +import java.util.Map; import java.util.Objects; import static org.elasticsearch.action.ValidateActions.addValidationError; @@ -154,6 +158,11 @@ public boolean equals(Object obj) { && this.deferValidation == other.deferValidation && ackTimeout().equals(other.ackTimeout()); } + + @Override + public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + return new CancellableTask(id, type, action, getDescription(), parentTaskId, headers); + } } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/StartTransformAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/StartTransformAction.java index 838a0650c8afa..f02aaf553b8a9 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/StartTransformAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/StartTransformAction.java @@ -14,6 +14,9 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.tasks.CancellableTask; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.tasks.TaskId; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xpack.core.transform.TransformField; @@ -22,6 +25,7 @@ import java.io.IOException; import java.time.Instant; import java.util.Collections; +import java.util.Map; import java.util.Objects; public class StartTransformAction extends ActionType { @@ -89,6 +93,11 @@ public boolean equals(Object obj) { // the base class does not implement equals, therefore we need to check timeout ourselves return Objects.equals(id, other.id) && Objects.equals(from, other.from) && ackTimeout().equals(other.ackTimeout()); } + + @Override + public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + return new CancellableTask(id, type, action, getDescription(), parentTaskId, headers); + } } public static class Response extends BaseTasksResponse implements ToXContentObject { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/ValidateTransformAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/ValidateTransformAction.java index 55c21b91b11d8..eae7d8a909c35 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/ValidateTransformAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/transform/action/ValidateTransformAction.java @@ -14,6 +14,9 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.tasks.CancellableTask; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.tasks.TaskId; import org.elasticsearch.xpack.core.common.validation.SourceDestValidator; import org.elasticsearch.xpack.core.transform.transforms.TransformConfig; @@ -94,6 +97,11 @@ public int hashCode() { // the base class does not implement hashCode, therefore we need to hash timeout ourselves return Objects.hash(ackTimeout(), config, deferValidation); } + + @Override + public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + return new CancellableTask(id, type, action, getDescription(), parentTaskId, headers); + } } public static class Response extends ActionResponse { diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportPutTransformAction.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportPutTransformAction.java index 4c978b1504a0f..ef42a2781962a 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportPutTransformAction.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportPutTransformAction.java @@ -15,6 +15,7 @@ import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.action.support.master.AcknowledgedTransportMasterNodeAction; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.client.internal.ParentTaskAssigningClient; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.block.ClusterBlockLevel; @@ -25,6 +26,7 @@ import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.persistent.PersistentTasksCustomMetadata; import org.elasticsearch.tasks.Task; +import org.elasticsearch.tasks.TaskId; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.ClientHelper; @@ -110,9 +112,11 @@ protected void masterOperation(Task task, Request request, ClusterState clusterS ); // <2> Validate source and destination indices + + var parentTaskId = new TaskId(clusterService.localNode().getId(), task.getId()); ActionListener checkPrivilegesListener = validateTransformListener.delegateFailureAndWrap( (l, aVoid) -> ClientHelper.executeAsyncWithOrigin( - client, + new ParentTaskAssigningClient(client, parentTaskId), ClientHelper.TRANSFORM_ORIGIN, ValidateTransformAction.INSTANCE, new ValidateTransformAction.Request(config, request.isDeferValidation(), request.ackTimeout()), diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStartTransformAction.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStartTransformAction.java index 23212636dc33c..59df3fa67074d 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStartTransformAction.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStartTransformAction.java @@ -17,6 +17,7 @@ import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.master.TransportMasterNodeAction; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.client.internal.ParentTaskAssigningClient; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.block.ClusterBlockLevel; @@ -31,6 +32,7 @@ import org.elasticsearch.persistent.PersistentTasksService; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.tasks.Task; +import org.elasticsearch.tasks.TaskId; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.ClientHelper; @@ -126,23 +128,25 @@ protected TransportStartTransformAction( @Override protected void masterOperation( - Task ignoredTask, + Task task, StartTransformAction.Request request, ClusterState state, ActionListener listener ) { TransformNodes.warnIfNoTransformNodes(state); - final SetOnce transformTaskParamsHolder = new SetOnce<>(); - final SetOnce transformConfigHolder = new SetOnce<>(); + var transformTaskParamsHolder = new SetOnce(); + var transformConfigHolder = new SetOnce(); + var parentTaskId = new TaskId(clusterService.localNode().getId(), task.getId()); + var parentClient = new ParentTaskAssigningClient(client, parentTaskId); // <5> Wait for the allocated task's state to STARTED ActionListener> newPersistentTaskActionListener = ActionListener - .wrap(task -> { + .wrap(t -> { TransformTaskParams transformTask = transformTaskParamsHolder.get(); assert transformTask != null; waitForTransformTaskStarted( - task.getId(), + t.getId(), transformTask, request.ackTimeout(), ActionListener.wrap(taskStarted -> listener.onResponse(new StartTransformAction.Response(true)), listener::onFailure) @@ -196,7 +200,7 @@ protected void masterOperation( return; } TransformIndex.createDestinationIndex( - client, + parentClient, auditor, indexNameExpressionResolver, state, @@ -257,7 +261,7 @@ protected void masterOperation( ) ); ClientHelper.executeAsyncWithOrigin( - client, + parentClient, ClientHelper.TRANSFORM_ORIGIN, ValidateTransformAction.INSTANCE, new ValidateTransformAction.Request(config, false, request.ackTimeout()), diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportValidateTransformAction.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportValidateTransformAction.java index 71593d416577e..7041f18df1e4a 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportValidateTransformAction.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportValidateTransformAction.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.HandledTransportAction; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.client.internal.ParentTaskAssigningClient; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNode; @@ -23,6 +24,7 @@ import org.elasticsearch.license.License; import org.elasticsearch.license.RemoteClusterLicenseChecker; import org.elasticsearch.tasks.Task; +import org.elasticsearch.tasks.TaskId; import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.common.validation.SourceDestValidator; import org.elasticsearch.xpack.core.transform.TransformDeprecations; @@ -30,8 +32,6 @@ import org.elasticsearch.xpack.core.transform.action.ValidateTransformAction; import org.elasticsearch.xpack.core.transform.action.ValidateTransformAction.Request; import org.elasticsearch.xpack.core.transform.action.ValidateTransformAction.Response; -import org.elasticsearch.xpack.core.transform.transforms.TransformConfig; -import org.elasticsearch.xpack.transform.transforms.Function; import org.elasticsearch.xpack.transform.transforms.FunctionFactory; import org.elasticsearch.xpack.transform.transforms.TransformNodes; import org.elasticsearch.xpack.transform.utils.SourceDestValidations; @@ -99,8 +99,10 @@ protected void doExecute(Task task, Request request, ActionListener li TransformNodes.warnIfNoTransformNodes(clusterState); - final TransformConfig config = request.getConfig(); - final Function function = FunctionFactory.create(config); + var config = request.getConfig(); + var function = FunctionFactory.create(config); + var parentTaskId = new TaskId(clusterService.localNode().getId(), task.getId()); + var parentClient = new ParentTaskAssigningClient(client, parentTaskId); if (config.getVersion() == null || config.getVersion().before(TransformDeprecations.MIN_TRANSFORM_VERSION)) { listener.onFailure( @@ -130,7 +132,7 @@ protected void doExecute(Task task, Request request, ActionListener li if (request.isDeferValidation()) { deduceMappingsListener.onResponse(emptyMap()); } else { - function.deduceMappings(client, config.getHeaders(), config.getId(), config.getSource(), deduceMappingsListener); + function.deduceMappings(parentClient, config.getHeaders(), config.getId(), config.getSource(), deduceMappingsListener); } }, listener::onFailure); @@ -139,7 +141,7 @@ protected void doExecute(Task task, Request request, ActionListener li if (request.isDeferValidation()) { validateQueryListener.onResponse(true); } else { - function.validateQuery(client, config.getHeaders(), config.getSource(), request.ackTimeout(), validateQueryListener); + function.validateQuery(parentClient, config.getHeaders(), config.getSource(), request.ackTimeout(), validateQueryListener); } }, listener::onFailure); diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/rest/action/RestPutTransformAction.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/rest/action/RestPutTransformAction.java index 78bcb9a12ffc0..e80d61589fed4 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/rest/action/RestPutTransformAction.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/rest/action/RestPutTransformAction.java @@ -15,6 +15,7 @@ import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.Scope; import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestCancellableNodeClient; import org.elasticsearch.rest.action.RestToXContentListener; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; @@ -66,6 +67,10 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient PutTransformAction.Request request = PutTransformAction.Request.fromXContent(parser, id, deferValidation, timeout); - return channel -> client.execute(PutTransformAction.INSTANCE, request, new RestToXContentListener<>(channel)); + return channel -> new RestCancellableNodeClient(client, restRequest.getHttpChannel()).execute( + PutTransformAction.INSTANCE, + request, + new RestToXContentListener<>(channel) + ); } } diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/rest/action/RestStartTransformAction.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/rest/action/RestStartTransformAction.java index fdfe2fe1744e7..9f2f310d7a9b9 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/rest/action/RestStartTransformAction.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/rest/action/RestStartTransformAction.java @@ -17,6 +17,7 @@ import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.Scope; import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestCancellableNodeClient; import org.elasticsearch.rest.action.RestToXContentListener; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xpack.core.transform.TransformField; @@ -45,7 +46,11 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient TimeValue timeout = restRequest.paramAsTime(TransformField.TIMEOUT.getPreferredName(), AcknowledgedRequest.DEFAULT_ACK_TIMEOUT); StartTransformAction.Request request = new StartTransformAction.Request(id, from, timeout); - return channel -> client.execute(StartTransformAction.INSTANCE, request, new RestToXContentListener<>(channel)); + return channel -> new RestCancellableNodeClient(client, restRequest.getHttpChannel()).execute( + StartTransformAction.INSTANCE, + request, + new RestToXContentListener<>(channel) + ); } private static Instant parseDateOrThrow(String date, ParseField paramName, LongSupplier now) {