From 8f5e2347bf6c9ed22cd4fa4cc648451fe88a5175 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 29 Nov 2024 23:56:17 +0000 Subject: [PATCH] iter --- .../index/mapper/DocumentParserContext.java | 15 + .../mapper/InferenceMetadataFieldsMapper.java | 1 + .../index/mapper/SourceFieldMapper.java | 6 +- .../elasticsearch/indices/IndicesModule.java | 2 - .../search/fetch/FetchContext.java | 10 + .../highlight/DefaultHighlighter.java | 2 +- x-pack/plugin/core/build.gradle | 1 - .../xpack/core/XPackClientPlugin.java | 15 - x-pack/plugin/inference/build.gradle | 1 + .../ShardBulkInferenceActionFilterIT.java | 2 +- .../inference/src/main/java/module-info.java | 1 + .../xpack/inference/InferenceFeatures.java | 16 +- .../xpack/inference/InferencePlugin.java | 49 +- .../ShardBulkInferenceActionFilter.java | 84 +- .../xpack/inference/chunking/Chunker.java | 4 + .../chunking/SentenceBoundaryChunker.java | 35 +- .../chunking/WordBoundaryChunker.java | 5 + .../highlight/SemanticTextHighlighter.java | 225 + .../mapper/AbstractSemanticTextFieldType.java | 30 + .../mapper/LegacySemanticTextField.java | 324 ++ .../mapper/LegacySemanticTextFieldMapper.java | 817 ++++ .../xpack/inference/mapper/OffsetField.java | 94 + .../mapper/OffsetSourceFieldMapper.java | 244 + .../mapper/OffsetSourceMetaFieldMapper.java | 78 + .../inference/mapper/SemanticTextField.java | 145 +- .../mapper/SemanticTextFieldMapper.java | 342 +- .../queries/SemanticQueryBuilder.java | 4 +- .../inference/queries/SparseVectorQuery.java | 72 + .../queries/SparseVectorQueryBuilder.java | 6 +- .../queries/TextExpansionQueryBuilder.java | 6 +- .../queries}/TokenPruningConfig.java | 4 +- .../queries}/WeightedTokensQueryBuilder.java | 5 +- .../queries}/WeightedTokensUtils.java | 14 +- .../ShardBulkInferenceActionFilterTests.java | 8 +- .../SemanticTextHighlighterTests.java | 572 +++ .../LegacySemanticTextFieldMapperTests.java | 1227 +++++ .../mapper/LegacySemanticTextFieldTests.java | 292 ++ .../mapper/SemanticTextFieldMapperTests.java | 320 +- .../mapper/SemanticTextFieldTests.java | 58 +- .../queries/SemanticQueryBuilderTests.java | 28 +- .../SparseVectorQueryBuilderTests.java | 11 +- .../TextExpansionQueryBuilderTests.java | 8 +- .../queries}/TokenPruningConfigTests.java | 2 +- .../WeightedTokensQueryBuilderTests.java | 20 +- .../xpack/inference/highlight/moby-dick.json | 4339 +++++++++++++++++ .../xpack/ml/MachineLearning.java | 19 - 46 files changed, 8761 insertions(+), 802 deletions(-) create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/AbstractSemanticTextFieldType.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextField.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldMapper.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetField.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetSourceFieldMapper.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetSourceMetaFieldMapper.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SparseVectorQuery.java rename x-pack/plugin/{ml/src/main/java/org/elasticsearch/xpack/ml => inference/src/main/java/org/elasticsearch/xpack/inference}/queries/SparseVectorQueryBuilder.java (98%) rename x-pack/plugin/{ml/src/main/java/org/elasticsearch/xpack/ml => inference/src/main/java/org/elasticsearch/xpack/inference}/queries/TextExpansionQueryBuilder.java (98%) rename x-pack/plugin/{core/src/main/java/org/elasticsearch/xpack/core/ml/search => inference/src/main/java/org/elasticsearch/xpack/inference/queries}/TokenPruningConfig.java (98%) rename x-pack/plugin/{core/src/main/java/org/elasticsearch/xpack/core/ml/search => inference/src/main/java/org/elasticsearch/xpack/inference/queries}/WeightedTokensQueryBuilder.java (97%) rename x-pack/plugin/{core/src/main/java/org/elasticsearch/xpack/core/ml/search => inference/src/main/java/org/elasticsearch/xpack/inference/queries}/WeightedTokensUtils.java (89%) create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldMapperTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldTests.java rename x-pack/plugin/{ml/src/test/java/org/elasticsearch/xpack/ml => inference/src/test/java/org/elasticsearch/xpack/inference}/queries/SparseVectorQueryBuilderTests.java (97%) rename x-pack/plugin/{ml/src/test/java/org/elasticsearch/xpack/ml => inference/src/test/java/org/elasticsearch/xpack/inference}/queries/TextExpansionQueryBuilderTests.java (97%) rename x-pack/plugin/{core/src/test/java/org/elasticsearch/xpack/core/ml/search => inference/src/test/java/org/elasticsearch/xpack/inference/queries}/TokenPruningConfigTests.java (96%) rename x-pack/plugin/{core/src/test/java/org/elasticsearch/xpack/core/ml/search => inference/src/test/java/org/elasticsearch/xpack/inference/queries}/WeightedTokensQueryBuilderTests.java (95%) create mode 100644 x-pack/plugin/inference/src/test/resources/org/elasticsearch/xpack/inference/highlight/moby-dick.json diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index 83ac81c768269..d8ff772ce4dcf 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -65,6 +65,11 @@ public boolean isWithinInferenceMetadata() { return in.isWithinInferenceMetadata(); } + @Override + public void markInferenceMetadata() { + in.markInferenceMetadata(); + } + @Override public ContentPath path() { return in.path(); @@ -150,6 +155,8 @@ private enum Scope { // Indicates if the source for this context has been marked to be recorded. Applies to synthetic source only. private boolean recordedSource; + private boolean inferenceMetadata; + private DocumentParserContext( MappingLookup mappingLookup, MappingParserContext mappingParserContext, @@ -342,6 +349,14 @@ public final DocumentParserContext addIgnoredFieldFromContext(IgnoredSourceField return this; } + public void markInferenceMetadata() { + this.inferenceMetadata = true; + } + + public final boolean hasInferenceMetadata() { + return false;// TODO: inferenceMetadata; + } + /** * Wraps {@link XContentDataHelper#encodeToken}, disabling dot expansion from {@link DotExpandingXContentParser}. * This helps avoid producing duplicate names in the same scope, due to expanding dots to objects. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/InferenceMetadataFieldsMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/InferenceMetadataFieldsMapper.java index 76638c362e549..1550f6298135b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/InferenceMetadataFieldsMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/InferenceMetadataFieldsMapper.java @@ -71,6 +71,7 @@ protected boolean supportsParsingObject() { protected void parseCreateField(DocumentParserContext context) throws IOException { XContentParser parser = context.parser(); XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); + context.markInferenceMetadata(); while (parser.nextToken() != XContentParser.Token.END_OBJECT) { XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); String fieldName = parser.currentName(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java index fa30bca4ddb03..709cfd562af71 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java @@ -402,7 +402,7 @@ public boolean isComplete() { public void preParse(DocumentParserContext context) throws IOException { BytesReference originalSource = context.sourceToParse().source(); XContentType contentType = context.sourceToParse().getXContentType(); - final BytesReference adaptedSource = applyFilters(context.mappingLookup(), originalSource, contentType); + final BytesReference adaptedSource = applyFilters(context, originalSource, contentType); if (adaptedSource != null) { final BytesRef ref = adaptedSource.toBytesRef(); @@ -420,7 +420,7 @@ public void preParse(DocumentParserContext context) throws IOException { @Nullable public BytesReference applyFilters( - @Nullable MappingLookup mappingLookup, + @Nullable DocumentParserContext context, @Nullable BytesReference originalSource, @Nullable XContentType contentType ) throws IOException { @@ -428,7 +428,7 @@ public BytesReference applyFilters( return null; } var modSourceFilter = sourceFilter; - if (mappingLookup != null && mappingLookup.inferenceFields().isEmpty() == false) { + if (context != null && context.hasInferenceMetadata()) { String[] modExcludes = new String[excludes != null ? excludes.length + 1 : 1]; if (excludes != null) { System.arraycopy(excludes, 0, modExcludes, 0, excludes.length); diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index 1ca4aee887c1b..340bff4e1c852 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -42,7 +42,6 @@ import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.index.mapper.IndexFieldMapper; import org.elasticsearch.index.mapper.IndexModeFieldMapper; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.IpFieldMapper; import org.elasticsearch.index.mapper.IpScriptFieldType; import org.elasticsearch.index.mapper.KeywordFieldMapper; @@ -273,7 +272,6 @@ private static Map initBuiltInMetadataMa builtInMetadataMappers.put(SeqNoFieldMapper.NAME, SeqNoFieldMapper.PARSER); builtInMetadataMappers.put(DocCountFieldMapper.NAME, DocCountFieldMapper.PARSER); builtInMetadataMappers.put(DataStreamTimestampFieldMapper.NAME, DataStreamTimestampFieldMapper.PARSER); - builtInMetadataMappers.put(InferenceMetadataFieldsMapper.NAME, InferenceMetadataFieldsMapper.PARSER); // _field_names must be added last so that it has a chance to see all the other mappers builtInMetadataMappers.put(FieldNamesFieldMapper.NAME, FieldNamesFieldMapper.PARSER); return Collections.unmodifiableMap(builtInMetadataMappers); diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java index 0bbbff3a5d5f4..f8387cca6f87d 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java @@ -10,6 +10,7 @@ package org.elasticsearch.search.fetch; import org.apache.lucene.search.Query; +import org.elasticsearch.index.cache.bitset.BitsetFilterCache; import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.index.query.ParsedQuery; @@ -25,6 +26,7 @@ import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.internal.ContextIndexSearcher; import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.internal.ShardSearchRequest; import org.elasticsearch.search.lookup.Source; import org.elasticsearch.search.rank.RankBuilder; import org.elasticsearch.search.rescore.RescoreContext; @@ -87,6 +89,14 @@ private static StoredFieldsContext buildStoredFieldsContext(SearchContext in) { return sfc; } + public ShardSearchRequest request() { + return searchContext.request(); + } + + public BitsetFilterCache bitsetFilterCache() { + return searchContext.bitsetFilterCache(); + } + /** * The name of the index that documents are being fetched from */ diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index 75f8e5588761a..e98ad8a5bc572 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -235,7 +235,7 @@ protected static String convertFieldValue(MappedFieldType type, Object value) { } } - protected static String mergeFieldValues(List fieldValues, char valuesSeparator) { + public static String mergeFieldValues(List fieldValues, char valuesSeparator) { // postings highlighter accepts all values in a single string, as offsets etc. need to match with content // loaded from stored fields, we merge all values using a proper separator String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(valuesSeparator)); diff --git a/x-pack/plugin/core/build.gradle b/x-pack/plugin/core/build.gradle index 51d770936e64e..f0eea64f9be6e 100644 --- a/x-pack/plugin/core/build.gradle +++ b/x-pack/plugin/core/build.gradle @@ -52,7 +52,6 @@ dependencies { api "commons-codec:commons-codec:${versions.commonscodec}" testImplementation project(path: ':modules:aggregations') testImplementation project(path: ':modules:data-streams') - testImplementation project(':modules:mapper-extras') // security deps api 'com.unboundid:unboundid-ldapsdk:6.0.3' diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index e2435c3396fa8..b84c1b6a48331 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -11,7 +11,6 @@ import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Setting; -import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.license.LicensesMetadata; import org.elasticsearch.persistent.PersistentTaskParams; import org.elasticsearch.persistent.PersistentTaskState; @@ -71,7 +70,6 @@ import org.elasticsearch.xpack.core.ml.job.config.JobTaskState; import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskParams; import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskState; -import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder; import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage; import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage; import org.elasticsearch.xpack.core.rollup.RollupField; @@ -392,17 +390,4 @@ public List getNamedXContent() { ) ); } - - // TODO: The WeightedTokensBuilder is slated for removal after the SparseVectorQueryBuilder is available. - // The logic to create a Boolean query based on weighted tokens will remain and/or be moved to server. - @Override - public List> getQueries() { - return List.of( - new SearchPlugin.QuerySpec( - WeightedTokensQueryBuilder.NAME, - WeightedTokensQueryBuilder::new, - WeightedTokensQueryBuilder::fromXContent - ) - ); - } } diff --git a/x-pack/plugin/inference/build.gradle b/x-pack/plugin/inference/build.gradle index 3c19e11a450b4..3e0ff7633267f 100644 --- a/x-pack/plugin/inference/build.gradle +++ b/x-pack/plugin/inference/build.gradle @@ -38,6 +38,7 @@ dependencies { testImplementation(testArtifact(project(':server'))) testImplementation(project(':x-pack:plugin:inference:qa:test-service-plugin')) testImplementation project(':modules:reindex') + testImplementation project(':modules:mapper-extras') clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin') api "com.ibm.icu:icu4j:${versions.icu4j}" diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java index 3b0fc869c8124..5bc578e69383d 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java @@ -36,7 +36,7 @@ import java.util.Map; import java.util.Set; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticTextInput; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextFieldTests.randomSemanticTextInput; import static org.hamcrest.Matchers.equalTo; public class ShardBulkInferenceActionFilterIT extends ESIntegTestCase { diff --git a/x-pack/plugin/inference/src/main/java/module-info.java b/x-pack/plugin/inference/src/main/java/module-info.java index 53974657e4e23..13f54d5d580bd 100644 --- a/x-pack/plugin/inference/src/main/java/module-info.java +++ b/x-pack/plugin/inference/src/main/java/module-info.java @@ -34,6 +34,7 @@ requires software.amazon.awssdk.retries.api; requires org.reactivestreams; requires org.elasticsearch.logging; + requires org.apache.lucene.highlighter; exports org.elasticsearch.xpack.inference.action; exports org.elasticsearch.xpack.inference.registry; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index c82f287792a7c..22980467a44ae 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -9,7 +9,7 @@ import org.elasticsearch.features.FeatureSpecification; import org.elasticsearch.features.NodeFeature; -import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; +import org.elasticsearch.xpack.inference.mapper.LegacySemanticTextFieldMapper; import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; import org.elasticsearch.xpack.inference.rank.random.RandomRankRetrieverBuilder; import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder; @@ -26,9 +26,9 @@ public Set getFeatures() { return Set.of( TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_RETRIEVER_SUPPORTED, RandomRankRetrieverBuilder.RANDOM_RERANKER_RETRIEVER_SUPPORTED, - SemanticTextFieldMapper.SEMANTIC_TEXT_SEARCH_INFERENCE_ID, + LegacySemanticTextFieldMapper.SEMANTIC_TEXT_SEARCH_INFERENCE_ID, SemanticQueryBuilder.SEMANTIC_TEXT_INNER_HITS, - SemanticTextFieldMapper.SEMANTIC_TEXT_DEFAULT_ELSER_2, + LegacySemanticTextFieldMapper.SEMANTIC_TEXT_DEFAULT_ELSER_2, TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_COMPOSITION_SUPPORTED ); } @@ -36,11 +36,11 @@ public Set getFeatures() { @Override public Set getTestFeatures() { return Set.of( - SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX, - SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX, - SemanticTextFieldMapper.SEMANTIC_TEXT_DELETE_FIX, - SemanticTextFieldMapper.SEMANTIC_TEXT_ZERO_SIZE_FIX, - SemanticTextFieldMapper.SEMANTIC_TEXT_ALWAYS_EMIT_INFERENCE_ID_FIX + LegacySemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX, + LegacySemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX, + LegacySemanticTextFieldMapper.SEMANTIC_TEXT_DELETE_FIX, + LegacySemanticTextFieldMapper.SEMANTIC_TEXT_ZERO_SIZE_FIX, + LegacySemanticTextFieldMapper.SEMANTIC_TEXT_ALWAYS_EMIT_INFERENCE_ID_FIX ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index 998dbe5fb007c..98509b78f6452 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -24,7 +24,10 @@ import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.TimeValue; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.index.mapper.MetadataFieldMapper; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.indices.SystemIndexDescriptor; import org.elasticsearch.inference.InferenceServiceExtension; import org.elasticsearch.inference.InferenceServiceRegistry; @@ -37,6 +40,7 @@ import org.elasticsearch.plugins.SystemIndexPlugin; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.rank.RankBuilder; import org.elasticsearch.search.rank.RankDoc; import org.elasticsearch.threadpool.ExecutorBuilder; @@ -67,9 +71,15 @@ import org.elasticsearch.xpack.inference.external.http.retry.RetrySettings; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; import org.elasticsearch.xpack.inference.external.http.sender.RequestExecutorServiceSettings; +import org.elasticsearch.xpack.inference.highlight.SemanticTextHighlighter; import org.elasticsearch.xpack.inference.logging.ThrottlerManager; +import org.elasticsearch.xpack.inference.mapper.OffsetSourceFieldMapper; +import org.elasticsearch.xpack.inference.mapper.OffsetSourceMetaFieldMapper; import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; +import org.elasticsearch.xpack.inference.queries.SparseVectorQueryBuilder; +import org.elasticsearch.xpack.inference.queries.TextExpansionQueryBuilder; +import org.elasticsearch.xpack.inference.queries.WeightedTokensQueryBuilder; import org.elasticsearch.xpack.inference.rank.random.RandomRankBuilder; import org.elasticsearch.xpack.inference.rank.random.RandomRankRetrieverBuilder; import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankBuilder; @@ -390,9 +400,24 @@ public void close() { IOUtils.closeWhileHandlingException(inferenceServiceRegistry.get(), throttlerToClose); } + @Override + public Map getMetadataMappers() { + return Map.of( + InferenceMetadataFieldsMapper.NAME, + InferenceMetadataFieldsMapper.PARSER, + OffsetSourceMetaFieldMapper.NAME, + OffsetSourceMetaFieldMapper.PARSER + ); + } + @Override public Map getMappers() { - return Map.of(SemanticTextFieldMapper.CONTENT_TYPE, SemanticTextFieldMapper.PARSER); + return Map.of( + SemanticTextFieldMapper.CONTENT_TYPE, + SemanticTextFieldMapper.PARSER, + OffsetSourceFieldMapper.CONTENT_TYPE, + OffsetSourceFieldMapper.PARSER + ); } @Override @@ -401,7 +426,22 @@ public Collection getMappedActionFilters() { } public List> getQueries() { - return List.of(new QuerySpec<>(SemanticQueryBuilder.NAME, SemanticQueryBuilder::new, SemanticQueryBuilder::fromXContent)); + return List.of( + new QuerySpec<>(SemanticQueryBuilder.NAME, SemanticQueryBuilder::new, SemanticQueryBuilder::fromXContent), + new QuerySpec<>(SparseVectorQueryBuilder.NAME, SparseVectorQueryBuilder::new, SparseVectorQueryBuilder::fromXContent), + new QuerySpec( + TextExpansionQueryBuilder.NAME, + TextExpansionQueryBuilder::new, + TextExpansionQueryBuilder::fromXContent + ), + // TODO: The WeightedTokensBuilder is slated for removal after the SparseVectorQueryBuilder is available. + // The logic to create a Boolean query based on weighted tokens will remain and/or be moved to server. + new QuerySpec( + WeightedTokensQueryBuilder.NAME, + WeightedTokensQueryBuilder::new, + WeightedTokensQueryBuilder::fromXContent + ) + ); } @Override @@ -411,4 +451,9 @@ public List> getRetrievers() { new RetrieverSpec<>(new ParseField(RandomRankBuilder.NAME), RandomRankRetrieverBuilder::fromXContent) ); } + + @Override + public Map getHighlighters() { + return Map.of(SemanticTextHighlighter.NAME, new SemanticTextHighlighter()); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index 3200277c1e1d3..029a91ca208a7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -42,12 +42,13 @@ import org.elasticsearch.rest.RestStatus; import org.elasticsearch.tasks.Task; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; +import org.elasticsearch.xpack.inference.mapper.LegacySemanticTextField; +import org.elasticsearch.xpack.inference.mapper.LegacySemanticTextFieldMapper; import org.elasticsearch.xpack.inference.mapper.SemanticTextField; import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; import org.elasticsearch.xpack.inference.registry.ModelRegistry; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -56,8 +57,6 @@ import java.util.Map; import java.util.stream.Collectors; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.toSemanticTextFieldChunks; - /** * A {@link MappedActionFilter} that intercepts {@link BulkShardRequest} to apply inference on fields specified * as {@link SemanticTextFieldMapper} in the index mapping. For each semantic text field referencing fields in @@ -375,8 +374,7 @@ private void addInferenceResponseFailure(int id, Exception failure) { /** * Applies the {@link FieldInferenceResponseAccumulator} to the provided {@link BulkItemRequest}. * If the response contains failures, the bulk item request is marked as failed for the downstream action. - * Otherwise, the source of the request is augmented with the field inference results under the - * {@link SemanticTextField#INFERENCE_FIELD} field. + * Otherwise, the source of the request is augmented with the field inference results. */ private void applyInferenceResponses(BulkItemRequest item, FieldInferenceResponseAccumulator response) { if (response.failures().isEmpty() == false) { @@ -396,22 +394,33 @@ private void applyInferenceResponses(BulkItemRequest item, FieldInferenceRespons var model = responses.get(0).model(); // ensure that the order in the original field is consistent in case of multiple inputs Collections.sort(responses, Comparator.comparingInt(FieldInferenceResponse::inputOrder)); - List inputs = responses.stream().filter(r -> r.isOriginalFieldInput).map(r -> r.input).collect(Collectors.toList()); + List inputs = responses.stream() + .filter(r -> r.field().equals(fieldName)) + .map(r -> r.input) + .collect(Collectors.toList()); + assert inputs.size() == 1; List results = responses.stream().map(r -> r.chunkedResults).collect(Collectors.toList()); - var result = new SemanticTextField( - fieldName, - inputs, - new SemanticTextField.InferenceResult( + if (addMetadataField) { + var result = new SemanticTextField( + fieldName, model.getInferenceEntityId(), new SemanticTextField.ModelSettings(model), - toSemanticTextFieldChunks(results, indexRequest.getContentType()) - ), - indexRequest.getContentType() - ); - if (addMetadataField) { + SemanticTextField.toSemanticTextFieldChunks(fieldName, inputs.get(0), results, indexRequest.getContentType()), + indexRequest.getContentType() + ); inferenceFieldsMap.put(fieldName, result); } else { - SemanticTextFieldMapper.insertValue(fieldName, newDocMap, result); + var result = new LegacySemanticTextField( + fieldName, + inputs, + new LegacySemanticTextField.InferenceResult( + model.getInferenceEntityId(), + new LegacySemanticTextField.ModelSettings(model), + LegacySemanticTextField.toSemanticTextFieldChunks(results, indexRequest.getContentType()) + ), + indexRequest.getContentType() + ); + LegacySemanticTextFieldMapper.insertValue(fieldName, newDocMap, result); } } if (addMetadataField) { @@ -489,17 +498,15 @@ private Map> createFieldInferenceRequests(Bu continue; } ensureResponseAccumulatorSlot(itemIndex); - final List values; + final String value; try { - values = nodeStringValues(field, valueObj); + value = SemanticTextField.nodeStringValues(field, valueObj); } catch (Exception exc) { addInferenceResponseFailure(item.id(), exc); break; } List fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>()); - for (var v : values) { - fieldRequests.add(new FieldInferenceRequest(itemIndex, field, v, order++, isOriginalFieldInput)); - } + fieldRequests.add(new FieldInferenceRequest(itemIndex, field, value, order++, isOriginalFieldInput)); } } } @@ -507,41 +514,6 @@ private Map> createFieldInferenceRequests(Bu } } - /** - * This method converts the given {@code valueObj} into a list of strings. - * If {@code valueObj} is not a string or a collection of strings, it throws an ElasticsearchStatusException. - */ - private static List nodeStringValues(String field, Object valueObj) { - if (valueObj instanceof Number || valueObj instanceof Boolean) { - return List.of(valueObj.toString()); - } else if (valueObj instanceof String value) { - return List.of(value); - } else if (valueObj instanceof Collection values) { - List valuesString = new ArrayList<>(); - for (var v : values) { - if (v instanceof Number || v instanceof Boolean) { - valuesString.add(v.toString()); - } else if (v instanceof String value) { - valuesString.add(value); - } else { - throw new ElasticsearchStatusException( - "Invalid format for field [{}], expected [String] got [{}]", - RestStatus.BAD_REQUEST, - field, - valueObj.getClass().getSimpleName() - ); - } - } - return valuesString; - } - throw new ElasticsearchStatusException( - "Invalid format for field [{}], expected [String] got [{}]", - RestStatus.BAD_REQUEST, - field, - valueObj.getClass().getSimpleName() - ); - } - static IndexRequest getIndexRequestOrNull(DocWriteRequest docWriteRequest) { if (docWriteRequest instanceof IndexRequest indexRequest) { return indexRequest; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/Chunker.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/Chunker.java index af7c706c807ec..01919ef19c6fd 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/Chunker.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/Chunker.java @@ -12,5 +12,9 @@ import java.util.List; public interface Chunker { + record Chunk(int startOffset, int endOffset) {} + List chunk(String input, ChunkingSettings chunkingSettings); + + List chunkOffset(String input, ChunkingSettings chunkingSettings); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunker.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunker.java index 5df940d6a3fba..c0315d0c56daf 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunker.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunker.java @@ -15,6 +15,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Locale; +import java.util.stream.Collectors; /** * Split text into chunks aligned on sentence boundaries. @@ -58,6 +59,20 @@ public List chunk(String input, ChunkingSettings chunkingSettings) { } } + @Override + public List chunkOffset(String input, ChunkingSettings chunkingSettings) { + if (chunkingSettings instanceof SentenceBoundaryChunkingSettings sentenceBoundaryChunkingSettings) { + return chunkOffset(input, sentenceBoundaryChunkingSettings.maxChunkSize, sentenceBoundaryChunkingSettings.sentenceOverlap > 0); + } else { + throw new IllegalArgumentException( + Strings.format( + "SentenceBoundaryChunker can't use ChunkingSettings with strategy [%s]", + chunkingSettings.getChunkingStrategy() + ) + ); + } + } + /** * Break the input text into small chunks on sentence boundaries. * @@ -66,7 +81,19 @@ public List chunk(String input, ChunkingSettings chunkingSettings) { * @return The input text chunked */ public List chunk(String input, int maxNumberWordsPerChunk, boolean includePrecedingSentence) { - var chunks = new ArrayList(); + var chunks = chunkOffset(input, maxNumberWordsPerChunk, includePrecedingSentence); + return chunks.stream().map(c -> input.substring(c.startOffset(), c.endOffset())).collect(Collectors.toList()); + } + + /** + * Break the input text into small chunks on sentence boundaries. + * + * @param input Text to chunk + * @param maxNumberWordsPerChunk Maximum size of the chunk + * @return The input text chunked + */ + public List chunkOffset(String input, int maxNumberWordsPerChunk, boolean includePrecedingSentence) { + var chunks = new ArrayList(); sentenceIterator.setText(input); wordIterator.setText(input); @@ -91,7 +118,7 @@ public List chunk(String input, int maxNumberWordsPerChunk, boolean incl int nextChunkWordCount = wordsInSentenceCount; if (chunkWordCount > 0) { // add a new chunk containing all the input up to this sentence - chunks.add(input.substring(chunkStart, chunkEnd)); + chunks.add(new Chunk(chunkStart, chunkEnd)); if (includePrecedingSentence) { if (wordsInPrecedingSentenceCount + wordsInSentenceCount > maxNumberWordsPerChunk) { @@ -127,7 +154,7 @@ public List chunk(String input, int maxNumberWordsPerChunk, boolean incl for (; i < sentenceSplits.size() - 1; i++) { // Because the substring was passed to splitLongSentence() // the returned positions need to be offset by chunkStart - chunks.add(input.substring(chunkStart + sentenceSplits.get(i).start(), chunkStart + sentenceSplits.get(i).end())); + chunks.add(new Chunk(chunkStart + sentenceSplits.get(i).start(), chunkStart + sentenceSplits.get(i).end())); } // The final split is partially filled. // Set the next chunk start to the beginning of the @@ -151,7 +178,7 @@ public List chunk(String input, int maxNumberWordsPerChunk, boolean incl } if (chunkWordCount > 0) { - chunks.add(input.substring(chunkStart)); + chunks.add(new Chunk(chunkStart, input.length())); } return chunks; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunker.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunker.java index c9c752b9aabbc..20ab2dbdd0fc9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunker.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunker.java @@ -55,6 +55,11 @@ public List chunk(String input, ChunkingSettings chunkingSettings) { } } + @Override + public List chunkOffset(String input, ChunkingSettings chunkingSettings) { + return List.of(); + } + /** * Break the input text into small chunks as dictated * by the chunking parameters diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java new file mode 100644 index 0000000000000..01723c5573ec2 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java @@ -0,0 +1,225 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.highlight; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.KnnByteVectorQuery; +import org.apache.lucene.search.KnnFloatVectorQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.elasticsearch.common.text.Text; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.MappingLookup; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DenseVectorFieldType; +import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.SparseVectorFieldType; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; +import org.elasticsearch.search.vectors.VectorData; +import org.elasticsearch.xpack.inference.mapper.OffsetSourceFieldMapper; +import org.elasticsearch.xpack.inference.mapper.OffsetSourceMetaFieldMapper; +import org.elasticsearch.xpack.inference.mapper.SemanticTextField; +import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; +import org.elasticsearch.xpack.inference.queries.SparseVectorQuery; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class SemanticTextHighlighter implements Highlighter { + public static final String NAME = "semantic"; + + private record OffsetAndScore(String sourceField, int startOffset, int endOffset, float score) {} + + @Override + public boolean canHighlight(MappedFieldType fieldType) { + if (fieldType instanceof SemanticTextFieldMapper.SemanticTextFieldType) { + return true; + } + return false; + } + + @Override + public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException { + SemanticTextFieldMapper.SemanticTextFieldType fieldType = (SemanticTextFieldMapper.SemanticTextFieldType) fieldContext.fieldType; + if (fieldType.getEmbeddingsField() == null) { + // nothing indexed yet + return null; + } + + final List queries = switch (fieldType.getModelSettings().taskType()) { + case SPARSE_EMBEDDING -> extractSparseVectorQueries( + (SparseVectorFieldType) fieldType.getEmbeddingsField().fieldType(), + fieldContext.query + ); + case TEXT_EMBEDDING -> extractDenseVectorQueries( + (DenseVectorFieldType) fieldType.getEmbeddingsField().fieldType(), + fieldContext.query + ); + default -> throw new IllegalStateException( + "Wrong task type for a semantic text field, got [" + fieldType.getModelSettings().taskType().name() + "]" + ); + }; + if (queries.isEmpty()) { + // nothing to highlight + return null; + } + + int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments() == 0 + ? 1 + : fieldContext.field.fieldOptions().numberOfFragments(); + var mappingLookup = fieldContext.context.getSearchExecutionContext().getMappingLookup(); + var inferenceMetadata = mappingLookup.inferenceFields().get(fieldContext.fieldName); + + List chunks = extractOffsetAndScores( + fieldContext.context.getSearchExecutionContext(), + fieldContext.hitContext.reader(), + fieldType, + fieldContext.hitContext.docId(), + queries + ); + + Map inputs = extractContentFields(fieldContext.hitContext, mappingLookup, inferenceMetadata.getSourceFields()); + chunks.sort(Comparator.comparingDouble(OffsetAndScore::score).reversed()); + int size = Math.min(chunks.size(), numberOfFragments); + Text[] snippets = new Text[size]; + for (int i = 0; i < size; i++) { + var chunk = chunks.get(i); + var content = inputs.get(chunk.sourceField); + snippets[i] = new Text(content.substring(chunk.startOffset, chunk.endOffset)); + } + return new HighlightField(fieldContext.fieldName, snippets); + } + + private Map extractContentFields( + FetchSubPhase.HitContext hitContext, + MappingLookup mappingLookup, + String[] sourceFields + ) throws IOException { + Map inputs = new HashMap<>(); + for (String sourceField : sourceFields) { + var sourceFieldType = mappingLookup.getFieldType(sourceField); + if (sourceFieldType == null) { + continue; + } + Object sourceValue = hitContext.source().extractValue(sourceFieldType.name(), null); + if (sourceValue != null) { + inputs.put(sourceField, SemanticTextField.nodeStringValues(sourceFieldType.name(), sourceValue)); + } + } + return inputs; + } + + private List extractOffsetAndScores( + SearchExecutionContext context, + LeafReader reader, + SemanticTextFieldMapper.SemanticTextFieldType fieldType, + int docID, + List leafQueries + ) throws IOException { + var bitSet = context.bitsetFilter(fieldType.getChunksField().parentTypeFilter()).getBitSet(reader.getContext()); + int previousParent = docID > 0 ? bitSet.prevSetBit(docID - 1) : -1; + + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + leafQueries.stream().forEach(q -> bq.add(q, BooleanClause.Occur.SHOULD)); + Weight weight = new IndexSearcher(reader).createWeight(bq.build(), ScoreMode.COMPLETE, 1); + Scorer scorer = weight.scorer(reader.getContext()); + var terms = reader.terms(OffsetSourceMetaFieldMapper.NAME); + if (terms == null) { + // TODO: Empty terms + return List.of(); + } + var offsetReader = new OffsetSourceFieldMapper.OffsetsReader(terms, fieldType.getOffsetsField().fullPath()); + if (previousParent != -1) { + scorer.iterator().advance(previousParent); + } else if (scorer.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { + return List.of(); + } + List results = new ArrayList<>(); + while (scorer.docID() < docID) { + if (offsetReader.advanceTo(scorer.docID()) == false) { + throw new IllegalStateException("Offsets not indexed?"); + } + results.add( + new OffsetAndScore( + offsetReader.getSourceFieldName(), + offsetReader.getStartOffset(), + offsetReader.getEndOffset(), + scorer.score() + ) + ); + if (scorer.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + } + return results; + } + + private List extractDenseVectorQueries(DenseVectorFieldType fieldType, Query querySection) throws IOException { + // TODO: Handle knn section when semantic text field can be used. + List queries = new ArrayList<>(); + querySection.visit(new QueryVisitor() { + @Override + public boolean acceptField(String field) { + return fieldType.name().equals(field); + } + + @Override + public void consumeTerms(Query query, Term... terms) { + super.consumeTerms(query, terms); + } + + @Override + public void visitLeaf(Query query) { + if (query instanceof KnnFloatVectorQuery knnQuery) { + queries.add(fieldType.createExactKnnQuery(VectorData.fromFloats(knnQuery.getTargetCopy()), null)); + } else if (query instanceof KnnByteVectorQuery knnQuery) { + queries.add(fieldType.createExactKnnQuery(VectorData.fromBytes(knnQuery.getTargetCopy()), null)); + } + } + }); + return queries; + } + + private List extractSparseVectorQueries(SparseVectorFieldType fieldType, Query querySection) throws IOException { + List queries = new ArrayList<>(); + querySection.visit(new QueryVisitor() { + @Override + public boolean acceptField(String field) { + return fieldType.name().equals(field); + } + + @Override + public void consumeTerms(Query query, Term... terms) { + super.consumeTerms(query, terms); + } + + @Override + public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) { + if (parent instanceof SparseVectorQuery sparseVectorQuery) { + queries.add(sparseVectorQuery.getTermsQuery()); + } + return this; + } + }); + return queries; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/AbstractSemanticTextFieldType.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/AbstractSemanticTextFieldType.java new file mode 100644 index 0000000000000..ec601d6162e78 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/AbstractSemanticTextFieldType.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.mapper; + +import org.elasticsearch.index.mapper.SimpleMappedFieldType; +import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.inference.InferenceResults; + +import java.util.Map; + +public abstract class AbstractSemanticTextFieldType extends SimpleMappedFieldType { + protected AbstractSemanticTextFieldType( + String name, + boolean isIndexed, + boolean isStored, + boolean hasDocValues, + TextSearchInfo textSearchInfo, + Map meta + ) { + super(name, isIndexed, isStored, hasDocValues, textSearchInfo, meta); + } + + public abstract QueryBuilder semanticQuery(InferenceResults inferenceResults, Integer requestSize, float boost, String queryName); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextField.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextField.java new file mode 100644 index 0000000000000..80fc21d68ff85 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextField.java @@ -0,0 +1,324 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.mapper; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.inference.ChunkedInferenceServiceResults; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.DeprecationHandler; +import org.elasticsearch.xcontent.NamedXContentRegistry; +import org.elasticsearch.xcontent.ObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.support.MapXContentParser; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING; +import static org.elasticsearch.inference.TaskType.TEXT_EMBEDDING; +import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; + +/** + * A {@link ToXContentObject} that is used to represent the transformation of the semantic text field's inputs. + * The resulting object preserves the original input under the {@link LegacySemanticTextField#TEXT_FIELD} and exposes + * the inference results under the {@link LegacySemanticTextField#INFERENCE_FIELD}. + * + * @param fieldName The original field name. + * @param originalValues The original values associated with the field name. + * @param inference The inference result. + * @param contentType The {@link XContentType} used to store the embeddings chunks. + */ +public record LegacySemanticTextField(String fieldName, List originalValues, InferenceResult inference, XContentType contentType) + implements + ToXContentObject { + + static final String TEXT_FIELD = "text"; + static final String INFERENCE_FIELD = "inference"; + static final String INFERENCE_ID_FIELD = "inference_id"; + static final String SEARCH_INFERENCE_ID_FIELD = "search_inference_id"; + static final String CHUNKS_FIELD = "chunks"; + static final String CHUNKED_EMBEDDINGS_FIELD = "embeddings"; + static final String CHUNKED_TEXT_FIELD = "text"; + static final String MODEL_SETTINGS_FIELD = "model_settings"; + static final String TASK_TYPE_FIELD = "task_type"; + static final String DIMENSIONS_FIELD = "dimensions"; + static final String SIMILARITY_FIELD = "similarity"; + static final String ELEMENT_TYPE_FIELD = "element_type"; + + public record InferenceResult(String inferenceId, ModelSettings modelSettings, List chunks) {} + + public record Chunk(String text, BytesReference rawEmbeddings) {} + + public record ModelSettings( + TaskType taskType, + Integer dimensions, + SimilarityMeasure similarity, + DenseVectorFieldMapper.ElementType elementType + ) implements ToXContentObject { + public ModelSettings(Model model) { + this( + model.getTaskType(), + model.getServiceSettings().dimensions(), + model.getServiceSettings().similarity(), + model.getServiceSettings().elementType() + ); + } + + public ModelSettings( + TaskType taskType, + Integer dimensions, + SimilarityMeasure similarity, + DenseVectorFieldMapper.ElementType elementType + ) { + this.taskType = Objects.requireNonNull(taskType, "task type must not be null"); + this.dimensions = dimensions; + this.similarity = similarity; + this.elementType = elementType; + validate(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(TASK_TYPE_FIELD, taskType.toString()); + if (dimensions != null) { + builder.field(DIMENSIONS_FIELD, dimensions); + } + if (similarity != null) { + builder.field(SIMILARITY_FIELD, similarity); + } + if (elementType != null) { + builder.field(ELEMENT_TYPE_FIELD, elementType); + } + return builder.endObject(); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("task_type=").append(taskType); + if (dimensions != null) { + sb.append(", dimensions=").append(dimensions); + } + if (similarity != null) { + sb.append(", similarity=").append(similarity); + } + if (elementType != null) { + sb.append(", element_type=").append(elementType); + } + return sb.toString(); + } + + private void validate() { + switch (taskType) { + case TEXT_EMBEDDING: + validateFieldPresent(DIMENSIONS_FIELD, dimensions); + validateFieldPresent(SIMILARITY_FIELD, similarity); + validateFieldPresent(ELEMENT_TYPE_FIELD, elementType); + break; + case SPARSE_EMBEDDING: + validateFieldNotPresent(DIMENSIONS_FIELD, dimensions); + validateFieldNotPresent(SIMILARITY_FIELD, similarity); + validateFieldNotPresent(ELEMENT_TYPE_FIELD, elementType); + break; + + default: + throw new IllegalArgumentException( + "Wrong [" + + TASK_TYPE_FIELD + + "], expected " + + TEXT_EMBEDDING + + " or " + + SPARSE_EMBEDDING + + ", got " + + taskType.name() + ); + } + } + + private void validateFieldPresent(String field, Object fieldValue) { + if (fieldValue == null) { + throw new IllegalArgumentException("required [" + field + "] field is missing for task_type [" + taskType.name() + "]"); + } + } + + private void validateFieldNotPresent(String field, Object fieldValue) { + if (fieldValue != null) { + throw new IllegalArgumentException("[" + field + "] is not allowed for task_type [" + taskType.name() + "]"); + } + } + } + + public static String getOriginalTextFieldName(String fieldName) { + return fieldName + "." + TEXT_FIELD; + } + + public static String getInferenceFieldName(String fieldName) { + return fieldName + "." + INFERENCE_FIELD; + } + + public static String getChunksFieldName(String fieldName) { + return getInferenceFieldName(fieldName) + "." + CHUNKS_FIELD; + } + + public static String getEmbeddingsFieldName(String fieldName) { + return getChunksFieldName(fieldName) + "." + CHUNKED_EMBEDDINGS_FIELD; + } + + static LegacySemanticTextField parse(XContentParser parser, Tuple context) throws IOException { + return SEMANTIC_TEXT_FIELD_PARSER.parse(parser, context); + } + + static ModelSettings parseModelSettings(XContentParser parser) throws IOException { + return MODEL_SETTINGS_PARSER.parse(parser, null); + } + + static ModelSettings parseModelSettingsFromMap(Object node) { + if (node == null) { + return null; + } + try { + Map map = XContentMapValues.nodeMapValue(node, MODEL_SETTINGS_FIELD); + XContentParser parser = new MapXContentParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + map, + XContentType.JSON + ); + return parseModelSettings(parser); + } catch (Exception exc) { + throw new ElasticsearchException(exc); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (originalValues.isEmpty() == false) { + builder.field(TEXT_FIELD, originalValues.size() == 1 ? originalValues.get(0) : originalValues); + } + builder.startObject(INFERENCE_FIELD); + builder.field(INFERENCE_ID_FIELD, inference.inferenceId); + builder.field(MODEL_SETTINGS_FIELD, inference.modelSettings); + builder.startArray(CHUNKS_FIELD); + for (var chunk : inference.chunks) { + builder.startObject(); + builder.field(CHUNKED_TEXT_FIELD, chunk.text); + XContentParser parser = XContentHelper.createParserNotCompressed( + XContentParserConfiguration.EMPTY, + chunk.rawEmbeddings, + contentType + ); + builder.field(CHUNKED_EMBEDDINGS_FIELD).copyCurrentStructure(parser); + builder.endObject(); + } + builder.endArray(); + builder.endObject(); + builder.endObject(); + return builder; + } + + @SuppressWarnings("unchecked") + private static final ConstructingObjectParser> SEMANTIC_TEXT_FIELD_PARSER = + new ConstructingObjectParser<>( + SemanticTextFieldMapper.CONTENT_TYPE, + true, + (args, context) -> new LegacySemanticTextField( + context.v1(), + (List) (args[0] == null ? List.of() : args[0]), + (InferenceResult) args[1], + context.v2() + ) + ); + + @SuppressWarnings("unchecked") + private static final ConstructingObjectParser INFERENCE_RESULT_PARSER = new ConstructingObjectParser<>( + INFERENCE_FIELD, + true, + args -> new InferenceResult((String) args[0], (ModelSettings) args[1], (List) args[2]) + ); + + private static final ConstructingObjectParser CHUNKS_PARSER = new ConstructingObjectParser<>( + CHUNKS_FIELD, + true, + args -> new Chunk((String) args[0], (BytesReference) args[1]) + ); + + private static final ConstructingObjectParser MODEL_SETTINGS_PARSER = new ConstructingObjectParser<>( + MODEL_SETTINGS_FIELD, + true, + args -> { + TaskType taskType = TaskType.fromString((String) args[0]); + Integer dimensions = (Integer) args[1]; + SimilarityMeasure similarity = args[2] == null ? null : SimilarityMeasure.fromString((String) args[2]); + DenseVectorFieldMapper.ElementType elementType = args[3] == null + ? null + : DenseVectorFieldMapper.ElementType.fromString((String) args[3]); + return new ModelSettings(taskType, dimensions, similarity, elementType); + } + ); + + static { + SEMANTIC_TEXT_FIELD_PARSER.declareStringArray(optionalConstructorArg(), new ParseField(TEXT_FIELD)); + SEMANTIC_TEXT_FIELD_PARSER.declareObject( + constructorArg(), + (p, c) -> INFERENCE_RESULT_PARSER.parse(p, null), + new ParseField(INFERENCE_FIELD) + ); + + INFERENCE_RESULT_PARSER.declareString(constructorArg(), new ParseField(INFERENCE_ID_FIELD)); + INFERENCE_RESULT_PARSER.declareObject(constructorArg(), MODEL_SETTINGS_PARSER, new ParseField(MODEL_SETTINGS_FIELD)); + INFERENCE_RESULT_PARSER.declareObjectArray(constructorArg(), CHUNKS_PARSER, new ParseField(CHUNKS_FIELD)); + + CHUNKS_PARSER.declareString(constructorArg(), new ParseField(CHUNKED_TEXT_FIELD)); + CHUNKS_PARSER.declareField(constructorArg(), (p, c) -> { + XContentBuilder b = XContentBuilder.builder(p.contentType().xContent()); + b.copyCurrentStructure(p); + return BytesReference.bytes(b); + }, new ParseField(CHUNKED_EMBEDDINGS_FIELD), ObjectParser.ValueType.OBJECT_ARRAY); + + MODEL_SETTINGS_PARSER.declareString(ConstructingObjectParser.constructorArg(), new ParseField(TASK_TYPE_FIELD)); + MODEL_SETTINGS_PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), new ParseField(DIMENSIONS_FIELD)); + MODEL_SETTINGS_PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), new ParseField(SIMILARITY_FIELD)); + MODEL_SETTINGS_PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), new ParseField(ELEMENT_TYPE_FIELD)); + } + + /** + * Converts the provided {@link ChunkedInferenceServiceResults} into a list of {@link Chunk}. + */ + public static List toSemanticTextFieldChunks(List results, XContentType contentType) { + List chunks = new ArrayList<>(); + for (var result : results) { + for (Iterator it = result.chunksAsMatchedTextAndByteReference(contentType.xContent()); it + .hasNext();) { + var chunkAsByteReference = it.next(); + chunks.add(new Chunk(chunkAsByteReference.matchedText(), chunkAsByteReference.bytesReference())); + } + } + return chunks; + } + +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldMapper.java new file mode 100644 index 0000000000000..addb616d7638f --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldMapper.java @@ -0,0 +1,817 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.mapper; + +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.search.join.ScoreMode; +import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.fielddata.FieldDataContext; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.mapper.BlockLoader; +import org.elasticsearch.index.mapper.BlockSourceReader; +import org.elasticsearch.index.mapper.DocumentParserContext; +import org.elasticsearch.index.mapper.DocumentParsingException; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.InferenceFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.index.mapper.MapperBuilderContext; +import org.elasticsearch.index.mapper.MapperMergeContext; +import org.elasticsearch.index.mapper.MappingLookup; +import org.elasticsearch.index.mapper.NestedObjectMapper; +import org.elasticsearch.index.mapper.ObjectMapper; +import org.elasticsearch.index.mapper.SourceValueFetcher; +import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; +import org.elasticsearch.index.query.MatchNoneQueryBuilder; +import org.elasticsearch.index.query.NestedQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.inference.InferenceResults; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentLocation; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults; +import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; + +import static org.elasticsearch.search.SearchService.DEFAULT_SIZE; + +/** + * A {@link FieldMapper} for semantic text fields. + */ +public class LegacySemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper { + public static final NodeFeature SEMANTIC_TEXT_SEARCH_INFERENCE_ID = new NodeFeature("semantic_text.search_inference_id"); + public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2"); + public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix"); + public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix"); + public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix"); + public static final NodeFeature SEMANTIC_TEXT_ZERO_SIZE_FIX = new NodeFeature("semantic_text.zero_size_fix"); + public static final NodeFeature SEMANTIC_TEXT_ALWAYS_EMIT_INFERENCE_ID_FIX = new NodeFeature( + "semantic_text.always_emit_inference_id_fix" + ); + + public static final String CONTENT_TYPE = "semantic_text"; + private final IndexSettings indexSettings; + + public static class Builder extends FieldMapper.Builder { + private final IndexVersion indexVersionCreated; + private final IndexSettings indexSettings; + + private final Parameter inferenceId = Parameter.stringParam( + LegacySemanticTextField.INFERENCE_ID_FIELD, + false, + mapper -> ((LegacySemanticTextFieldType) mapper.fieldType()).inferenceId, + SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID + ).addValidator(v -> { + if (Strings.isEmpty(v)) { + throw new IllegalArgumentException( + "[" + + LegacySemanticTextField.INFERENCE_ID_FIELD + + "] on mapper [" + + leafName() + + "] of type [" + + CONTENT_TYPE + + "] must not be empty" + ); + } + }).alwaysSerialize(); + + private final Parameter searchInferenceId = Parameter.stringParam( + LegacySemanticTextField.SEARCH_INFERENCE_ID_FIELD, + true, + mapper -> ((LegacySemanticTextFieldType) mapper.fieldType()).searchInferenceId, + null + ).acceptsNull().addValidator(v -> { + if (v != null && Strings.isEmpty(v)) { + throw new IllegalArgumentException( + "[" + + LegacySemanticTextField.SEARCH_INFERENCE_ID_FIELD + + "] on mapper [" + + leafName() + + "] of type [" + + CONTENT_TYPE + + "] must not be empty" + ); + } + }); + + private final Parameter modelSettings = new Parameter<>( + LegacySemanticTextField.MODEL_SETTINGS_FIELD, + true, + () -> null, + (n, c, o) -> LegacySemanticTextField.parseModelSettingsFromMap(o), + mapper -> ((LegacySemanticTextFieldType) mapper.fieldType()).modelSettings, + XContentBuilder::field, + Objects::toString + ).acceptsNull().setMergeValidator(LegacySemanticTextFieldMapper::canMergeModelSettings); + + private final Parameter> meta = Parameter.metaParam(); + + private Function inferenceFieldBuilder; + + public static Builder from(LegacySemanticTextFieldMapper mapper) { + Builder builder = new Builder( + mapper.leafName(), + mapper.fieldType().indexVersionCreated, + mapper.fieldType().getChunksField().bitsetProducer(), + mapper.indexSettings + ); + builder.init(mapper); + return builder; + } + + public Builder( + String name, + IndexVersion indexVersionCreated, + Function bitSetProducer, + IndexSettings indexSettings + ) { + super(name); + this.indexVersionCreated = indexVersionCreated; + this.indexSettings = indexSettings; + this.inferenceFieldBuilder = c -> createInferenceField( + c, + indexVersionCreated, + modelSettings.get(), + bitSetProducer, + indexSettings + ); + } + + public Builder setInferenceId(String id) { + this.inferenceId.setValue(id); + return this; + } + + public Builder setSearchInferenceId(String id) { + this.searchInferenceId.setValue(id); + return this; + } + + public Builder setModelSettings(LegacySemanticTextField.ModelSettings value) { + this.modelSettings.setValue(value); + return this; + } + + @Override + protected Parameter[] getParameters() { + return new Parameter[] { inferenceId, searchInferenceId, modelSettings, meta }; + } + + @Override + protected void merge(FieldMapper mergeWith, Conflicts conflicts, MapperMergeContext mapperMergeContext) { + LegacySemanticTextFieldMapper semanticMergeWith = (LegacySemanticTextFieldMapper) mergeWith; + semanticMergeWith = copySettings(semanticMergeWith, mapperMergeContext); + + super.merge(semanticMergeWith, conflicts, mapperMergeContext); + conflicts.check(); + var context = mapperMergeContext.createChildContext(semanticMergeWith.leafName(), ObjectMapper.Dynamic.FALSE); + var inferenceField = inferenceFieldBuilder.apply(context.getMapperBuilderContext()); + var mergedInferenceField = inferenceField.merge(semanticMergeWith.fieldType().getInferenceField(), context); + inferenceFieldBuilder = c -> mergedInferenceField; + } + + @Override + public LegacySemanticTextFieldMapper build(MapperBuilderContext context) { + if (copyTo.copyToFields().isEmpty() == false) { + throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support [copy_to]"); + } + if (multiFieldsBuilder.hasMultiFields()) { + throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support multi-fields"); + } + final String fullName = context.buildFullName(leafName()); + + if (context.isInNestedContext()) { + throw new IllegalArgumentException(CONTENT_TYPE + " field [" + fullName + "] cannot be nested"); + } + var childContext = context.createChildContext(leafName(), ObjectMapper.Dynamic.FALSE); + final ObjectMapper inferenceField = inferenceFieldBuilder.apply(childContext); + + return new LegacySemanticTextFieldMapper( + leafName(), + new LegacySemanticTextFieldType( + fullName, + inferenceId.getValue(), + searchInferenceId.getValue(), + modelSettings.getValue(), + inferenceField, + indexVersionCreated, + meta.getValue() + ), + builderParams(this, context), + indexSettings + ); + } + + /** + * As necessary, copy settings from this builder to the passed-in mapper. + * Used to preserve {@link SemanticTextField.ModelSettings} when updating a semantic text mapping to one where the model settings + * are not specified. + * + * @param mapper The mapper + * @return A mapper with the copied settings applied + */ + private LegacySemanticTextFieldMapper copySettings(LegacySemanticTextFieldMapper mapper, MapperMergeContext mapperMergeContext) { + LegacySemanticTextFieldMapper returnedMapper = mapper; + if (mapper.fieldType().getModelSettings() == null) { + Builder builder = from(mapper); + builder.setModelSettings(modelSettings.getValue()); + returnedMapper = builder.build(mapperMergeContext.getMapperBuilderContext()); + } + + return returnedMapper; + } + } + + private LegacySemanticTextFieldMapper( + String simpleName, + MappedFieldType mappedFieldType, + BuilderParams builderParams, + IndexSettings indexSettings + ) { + super(simpleName, mappedFieldType, builderParams); + this.indexSettings = indexSettings; + } + + @Override + public Iterator iterator() { + List subIterators = new ArrayList<>(); + subIterators.add(fieldType().getInferenceField()); + return subIterators.iterator(); + } + + @Override + public FieldMapper.Builder getMergeBuilder() { + return Builder.from(this); + } + + @Override + protected void parseCreateField(DocumentParserContext context) throws IOException { + XContentParser parser = context.parser(); + if (parser.currentToken() == XContentParser.Token.VALUE_NULL) { + return; + } + + XContentLocation xContentLocation = parser.getTokenLocation(); + final LegacySemanticTextField field; + boolean isWithinLeaf = context.path().isWithinLeafObject(); + try { + context.path().setWithinLeafObject(true); + field = LegacySemanticTextField.parse(parser, new Tuple<>(fullPath(), context.parser().contentType())); + } finally { + context.path().setWithinLeafObject(isWithinLeaf); + } + + final String fullFieldName = fieldType().name(); + if (field.inference().inferenceId().equals(fieldType().getInferenceId()) == false) { + throw new DocumentParsingException( + xContentLocation, + Strings.format( + "The configured %s [%s] for field [%s] doesn't match the %s [%s] reported in the document.", + LegacySemanticTextField.INFERENCE_ID_FIELD, + field.inference().inferenceId(), + fullFieldName, + LegacySemanticTextField.INFERENCE_ID_FIELD, + fieldType().getInferenceId() + ) + ); + } + + final LegacySemanticTextFieldMapper mapper; + if (fieldType().getModelSettings() == null) { + context.path().remove(); + Builder builder = (Builder) new Builder( + leafName(), + fieldType().indexVersionCreated, + fieldType().getChunksField().bitsetProducer(), + indexSettings + ).init(this); + try { + mapper = builder.setModelSettings(field.inference().modelSettings()) + .setInferenceId(field.inference().inferenceId()) + .build(context.createDynamicMapperBuilderContext()); + context.addDynamicMapper(mapper); + } finally { + context.path().add(leafName()); + } + } else { + Conflicts conflicts = new Conflicts(fullFieldName); + canMergeModelSettings(fieldType().getModelSettings(), field.inference().modelSettings(), conflicts); + try { + conflicts.check(); + } catch (Exception exc) { + throw new DocumentParsingException( + xContentLocation, + "Incompatible model settings for field [" + + fullPath() + + "]. Check that the " + + LegacySemanticTextField.INFERENCE_ID_FIELD + + " is not using different model settings", + exc + ); + } + mapper = this; + } + + var chunksField = mapper.fieldType().getChunksField(); + var embeddingsField = mapper.fieldType().getEmbeddingsField(); + for (var chunk : field.inference().chunks()) { + try ( + XContentParser subParser = XContentHelper.createParserNotCompressed( + XContentParserConfiguration.EMPTY, + chunk.rawEmbeddings(), + context.parser().contentType() + ) + ) { + DocumentParserContext subContext = context.createNestedContext(chunksField).switchParser(subParser); + subParser.nextToken(); + embeddingsField.parse(subContext); + } + } + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + public LegacySemanticTextFieldType fieldType() { + return (LegacySemanticTextFieldType) super.fieldType(); + } + + @Override + public InferenceFieldMetadata getMetadata(Set sourcePaths) { + String[] copyFields = sourcePaths.toArray(String[]::new); + // ensure consistent order + Arrays.sort(copyFields); + return new InferenceFieldMetadata(fullPath(), fieldType().getInferenceId(), fieldType().getSearchInferenceId(), copyFields); + } + + @Override + public Object getOriginalValue(Map sourceAsMap) { + Object fieldValue = sourceAsMap.get(fullPath()); + if (fieldValue == null) { + return null; + } else if (fieldValue instanceof Map == false) { + // Don't try to further validate the non-map value, that will be handled when the source is fully parsed + return fieldValue; + } + + Map fieldValueMap = XContentMapValues.nodeMapValue(fieldValue, "Field [" + fullPath() + "]"); + return XContentMapValues.extractValue(LegacySemanticTextField.TEXT_FIELD, fieldValueMap); + } + + @Override + protected void doValidate(MappingLookup mappers) { + int parentPathIndex = fullPath().lastIndexOf(leafName()); + if (parentPathIndex > 0) { + // Check that the parent object field allows subobjects. + // Subtract one from the parent path index to omit the trailing dot delimiter. + ObjectMapper parentMapper = mappers.objectMappers().get(fullPath().substring(0, parentPathIndex - 1)); + if (parentMapper == null) { + throw new IllegalStateException(CONTENT_TYPE + " field [" + fullPath() + "] does not have a parent object mapper"); + } + + if (parentMapper.subobjects() == ObjectMapper.Subobjects.DISABLED) { + throw new IllegalArgumentException( + CONTENT_TYPE + " field [" + fullPath() + "] cannot be in an object field with subobjects disabled" + ); + } + } + } + + public static class LegacySemanticTextFieldType extends AbstractSemanticTextFieldType { + private final String inferenceId; + private final String searchInferenceId; + private final LegacySemanticTextField.ModelSettings modelSettings; + private final ObjectMapper inferenceField; + private final IndexVersion indexVersionCreated; + + public LegacySemanticTextFieldType( + String name, + String inferenceId, + String searchInferenceId, + LegacySemanticTextField.ModelSettings modelSettings, + ObjectMapper inferenceField, + IndexVersion indexVersionCreated, + Map meta + ) { + super(name, true, false, false, TextSearchInfo.NONE, meta); + this.inferenceId = inferenceId; + this.searchInferenceId = searchInferenceId; + this.modelSettings = modelSettings; + this.inferenceField = inferenceField; + this.indexVersionCreated = indexVersionCreated; + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + public String getInferenceId() { + return inferenceId; + } + + public String getSearchInferenceId() { + return searchInferenceId == null ? inferenceId : searchInferenceId; + } + + public LegacySemanticTextField.ModelSettings getModelSettings() { + return modelSettings; + } + + public ObjectMapper getInferenceField() { + return inferenceField; + } + + public NestedObjectMapper getChunksField() { + return (NestedObjectMapper) inferenceField.getMapper(LegacySemanticTextField.CHUNKS_FIELD); + } + + public FieldMapper getEmbeddingsField() { + return (FieldMapper) getChunksField().getMapper(LegacySemanticTextField.CHUNKED_EMBEDDINGS_FIELD); + } + + @Override + public Query termQuery(Object value, SearchExecutionContext context) { + throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support term query"); + } + + @Override + public Query existsQuery(SearchExecutionContext context) { + if (getEmbeddingsField() == null) { + return new MatchNoDocsQuery(); + } + + return NestedQueryBuilder.toQuery( + (c -> getEmbeddingsField().fieldType().existsQuery(c)), + LegacySemanticTextField.getChunksFieldName(name()), + ScoreMode.None, + false, + context + ); + } + + @Override + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { + // Redirect the fetcher to load the original values of the field + return SourceValueFetcher.toString(LegacySemanticTextField.getOriginalTextFieldName(name()), context, format); + } + + @Override + public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) { + throw new IllegalArgumentException("[semantic_text] fields do not support sorting, scripting or aggregating"); + } + + @Override + public boolean fieldHasValue(FieldInfos fieldInfos) { + return fieldInfos.fieldInfo(LegacySemanticTextField.getEmbeddingsFieldName(name())) != null; + } + + @Override + public QueryBuilder semanticQuery(InferenceResults inferenceResults, Integer requestSize, float boost, String queryName) { + String nestedFieldPath = LegacySemanticTextField.getChunksFieldName(name()); + String inferenceResultsFieldName = LegacySemanticTextField.getEmbeddingsFieldName(name()); + QueryBuilder childQueryBuilder; + + if (modelSettings == null) { + // No inference results have been indexed yet + childQueryBuilder = new MatchNoneQueryBuilder(); + } else { + childQueryBuilder = switch (modelSettings.taskType()) { + case SPARSE_EMBEDDING -> { + if (inferenceResults instanceof TextExpansionResults == false) { + throw new IllegalArgumentException( + generateQueryInferenceResultsTypeMismatchMessage(inferenceResults, TextExpansionResults.NAME) + ); + } + + // TODO: Use WeightedTokensQueryBuilder + TextExpansionResults textExpansionResults = (TextExpansionResults) inferenceResults; + var boolQuery = QueryBuilders.boolQuery(); + for (var weightedToken : textExpansionResults.getWeightedTokens()) { + boolQuery.should( + QueryBuilders.termQuery(inferenceResultsFieldName, weightedToken.token()).boost(weightedToken.weight()) + ); + } + boolQuery.minimumShouldMatch(1); + + yield boolQuery; + } + case TEXT_EMBEDDING -> { + if (inferenceResults instanceof MlTextEmbeddingResults == false) { + throw new IllegalArgumentException( + generateQueryInferenceResultsTypeMismatchMessage(inferenceResults, MlTextEmbeddingResults.NAME) + ); + } + + MlTextEmbeddingResults textEmbeddingResults = (MlTextEmbeddingResults) inferenceResults; + float[] inference = textEmbeddingResults.getInferenceAsFloat(); + if (inference.length != modelSettings.dimensions()) { + throw new IllegalArgumentException( + generateDimensionCountMismatchMessage(inference.length, modelSettings.dimensions()) + ); + } + + Integer k = requestSize; + if (k != null) { + // Ensure that k is at least the default size so that aggregations work when size is set to 0 in the request + k = Math.max(k, DEFAULT_SIZE); + } + + yield new KnnVectorQueryBuilder(inferenceResultsFieldName, inference, k, null, null); + } + default -> throw new IllegalStateException( + "Field [" + + name() + + "] is configured to use an inference endpoint with an unsupported task type [" + + modelSettings.taskType() + + "]" + ); + }; + } + + return new NestedQueryBuilder(nestedFieldPath, childQueryBuilder, ScoreMode.Max).boost(boost).queryName(queryName); + } + + private String generateQueryInferenceResultsTypeMismatchMessage(InferenceResults inferenceResults, String expectedResultsType) { + StringBuilder sb = new StringBuilder( + "Field [" + + name() + + "] expected query inference results to be of type [" + + expectedResultsType + + "]," + + " got [" + + inferenceResults.getWriteableName() + + "]." + ); + + return generateInvalidQueryInferenceResultsMessage(sb); + } + + private String generateDimensionCountMismatchMessage(int inferenceDimCount, int expectedDimCount) { + StringBuilder sb = new StringBuilder( + "Field [" + + name() + + "] expected query inference results with " + + expectedDimCount + + " dimensions, got " + + inferenceDimCount + + " dimensions." + ); + + return generateInvalidQueryInferenceResultsMessage(sb); + } + + private String generateInvalidQueryInferenceResultsMessage(StringBuilder baseMessageBuilder) { + if (searchInferenceId != null && searchInferenceId.equals(inferenceId) == false) { + baseMessageBuilder.append( + " Is the search inference endpoint [" + + searchInferenceId + + "] compatible with the inference endpoint [" + + inferenceId + + "]?" + ); + } else { + baseMessageBuilder.append(" Has the configuration for inference endpoint [" + inferenceId + "] changed?"); + } + + return baseMessageBuilder.toString(); + } + + @Override + public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) { + SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name().concat(".text"))); + return new BlockSourceReader.BytesRefsBlockLoader(fetcher, BlockSourceReader.lookupMatchingAll()); + } + } + + /** + *

+ * Insert or replace the path's value in the map with the provided new value. The map will be modified in-place. + * If the complete path does not exist in the map, it will be added to the deepest (sub-)map possible. + *

+ *

+ * For example, given the map: + *

+ *
+     * {
+     *   "path1": {
+     *     "path2": {
+     *       "key1": "value1"
+     *     }
+     *   }
+     * }
+     * 
+ *

+ * And the caller wanted to insert {@code "path1.path2.path3.key2": "value2"}, the method would emit the modified map: + *

+ *
+     * {
+     *   "path1": {
+     *     "path2": {
+     *       "key1": "value1",
+     *       "path3.key2": "value2"
+     *     }
+     *   }
+     * }
+     * 
+ * + * @param path the value's path in the map. + * @param map the map to search and modify in-place. + * @param newValue the new value to assign to the path. + * + * @throws IllegalArgumentException If either the path cannot be fully traversed or there is ambiguity about where to insert the new + * value. + */ + public static void insertValue(String path, Map map, Object newValue) { + String[] pathElements = path.split("\\."); + if (pathElements.length == 0) { + return; + } + + List suffixMaps = extractSuffixMaps(pathElements, 0, map); + if (suffixMaps.isEmpty()) { + // This should never happen. Throw in case it does for some reason. + throw new IllegalStateException("extractSuffixMaps returned an empty suffix map list"); + } else if (suffixMaps.size() == 1) { + SuffixMap suffixMap = suffixMaps.getFirst(); + suffixMap.map().put(suffixMap.suffix(), newValue); + } else { + throw new IllegalArgumentException( + "Path [" + path + "] could be inserted in " + suffixMaps.size() + " distinct ways, it is ambiguous which one to use" + ); + } + } + + private record SuffixMap(String suffix, Map map) {} + + private static List extractSuffixMaps(String[] pathElements, int index, Object currentValue) { + if (currentValue instanceof List valueList) { + List suffixMaps = new ArrayList<>(valueList.size()); + for (Object o : valueList) { + suffixMaps.addAll(extractSuffixMaps(pathElements, index, o)); + } + + return suffixMaps; + } else if (currentValue instanceof Map) { + @SuppressWarnings("unchecked") + Map map = (Map) currentValue; + List suffixMaps = new ArrayList<>(map.size()); + + String key = pathElements[index]; + while (index < pathElements.length) { + if (map.containsKey(key)) { + if (index + 1 == pathElements.length) { + // We found the complete path + suffixMaps.add(new SuffixMap(key, map)); + } else { + // We've matched that path partially, keep traversing to try to match it fully + suffixMaps.addAll(extractSuffixMaps(pathElements, index + 1, map.get(key))); + } + } + + if (++index < pathElements.length) { + key += "." + pathElements[index]; + } + } + + if (suffixMaps.isEmpty()) { + // We checked for all remaining elements in the path, and they do not exist. This means we found a leaf map that we should + // add the value to. + suffixMaps.add(new SuffixMap(key, map)); + } + + return suffixMaps; + } else { + throw new IllegalArgumentException( + "Path [" + + String.join(".", Arrays.copyOfRange(pathElements, 0, index)) + + "] has value [" + + currentValue + + "] of type [" + + currentValue.getClass().getSimpleName() + + "], which cannot be traversed into further" + ); + } + } + + private static ObjectMapper createInferenceField( + MapperBuilderContext context, + IndexVersion indexVersionCreated, + @Nullable LegacySemanticTextField.ModelSettings modelSettings, + Function bitSetProducer, + IndexSettings indexSettings + ) { + return new ObjectMapper.Builder(LegacySemanticTextField.INFERENCE_FIELD, Optional.of(ObjectMapper.Subobjects.ENABLED)).dynamic( + ObjectMapper.Dynamic.FALSE + ).add(createChunksField(indexVersionCreated, modelSettings, bitSetProducer, indexSettings)).build(context); + } + + private static NestedObjectMapper.Builder createChunksField( + IndexVersion indexVersionCreated, + @Nullable LegacySemanticTextField.ModelSettings modelSettings, + Function bitSetProducer, + IndexSettings indexSettings + ) { + NestedObjectMapper.Builder chunksField = new NestedObjectMapper.Builder( + LegacySemanticTextField.CHUNKS_FIELD, + indexVersionCreated, + bitSetProducer, + indexSettings + ); + chunksField.dynamic(ObjectMapper.Dynamic.FALSE); + KeywordFieldMapper.Builder chunkTextField = new KeywordFieldMapper.Builder( + LegacySemanticTextField.CHUNKED_TEXT_FIELD, + indexVersionCreated + ).indexed(false).docValues(false); + if (modelSettings != null) { + chunksField.add(createEmbeddingsField(indexVersionCreated, modelSettings)); + } + chunksField.add(chunkTextField); + return chunksField; + } + + private static Mapper.Builder createEmbeddingsField( + IndexVersion indexVersionCreated, + LegacySemanticTextField.ModelSettings modelSettings + ) { + return switch (modelSettings.taskType()) { + case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(LegacySemanticTextField.CHUNKED_EMBEDDINGS_FIELD); + case TEXT_EMBEDDING -> { + DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder( + LegacySemanticTextField.CHUNKED_EMBEDDINGS_FIELD, + indexVersionCreated + ); + + SimilarityMeasure similarity = modelSettings.similarity(); + if (similarity != null) { + switch (similarity) { + case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE); + case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT); + case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM); + default -> throw new IllegalArgumentException( + "Unknown similarity measure in model_settings [" + similarity.name() + "]" + ); + } + } + denseVectorMapperBuilder.dimensions(modelSettings.dimensions()); + denseVectorMapperBuilder.elementType(modelSettings.elementType()); + + yield denseVectorMapperBuilder; + } + default -> throw new IllegalArgumentException("Invalid task_type in model_settings [" + modelSettings.taskType().name() + "]"); + }; + } + + private static boolean canMergeModelSettings( + LegacySemanticTextField.ModelSettings previous, + LegacySemanticTextField.ModelSettings current, + Conflicts conflicts + ) { + if (Objects.equals(previous, current)) { + return true; + } + if (previous == null) { + return true; + } + conflicts.addConflict("model_settings", ""); + return false; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetField.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetField.java new file mode 100644 index 0000000000000..3449a21b51104 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetField.java @@ -0,0 +1,94 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.mapper; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; + +public final class OffsetField extends Field { + + private static final FieldType FIELD_TYPE = new FieldType(); + + static { + FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + } + + private int startOffset; + private int endOffset; + + public OffsetField(String fieldName, String sourceFieldName, int startOffset, int endOffset) { + super(fieldName, sourceFieldName, FIELD_TYPE); + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + public void setOffsets(int startOffset, int endOffset) { + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + @Override + public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { + OffsetTokenStream stream; + if (reuse instanceof OffsetTokenStream) { + stream = (OffsetTokenStream) reuse; + } else { + stream = new OffsetTokenStream(); + } + + stream.setValues((String) fieldsData, startOffset, endOffset); + return stream; + } + + private static final class OffsetTokenStream extends TokenStream { + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); + private boolean used = true; + private String value = null; + private int startOffset = 0; + private int endOffset = 0; + + private OffsetTokenStream() {} + + /** Sets the values */ + void setValues(String value, int startOffset, int endOffset) { + this.value = value; + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + @Override + public boolean incrementToken() { + if (used) { + return false; + } + clearAttributes(); + termAttribute.append(value); + offsetAttribute.setOffset(startOffset, endOffset); + used = true; + return true; + } + + @Override + public void reset() { + used = false; + } + + @Override + public void close() { + value = null; + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetSourceFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetSourceFieldMapper.java new file mode 100644 index 0000000000000..fd309233e6aec --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetSourceFieldMapper.java @@ -0,0 +1,244 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.mapper; + +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.CompiledAutomaton; +import org.elasticsearch.common.xcontent.XContentParserUtils; +import org.elasticsearch.index.fielddata.FieldDataContext; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.mapper.DocumentParserContext; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.MapperBuilderContext; +import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.search.fetch.StoredFieldsSpec; +import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +public class OffsetSourceFieldMapper extends FieldMapper { + public static final String CONTENT_TYPE = "offset_source"; + public static final String NAME = "_offset_source"; + + private static final String SOURCE_NAME_FIELD = "field"; + private static final String START_OFFSET_FIELD = "start"; + private static final String END_OFFSET_FIELD = "end"; + + public static class Builder extends FieldMapper.Builder { + private final Parameter> meta = Parameter.metaParam(); + + public Builder(String name) { + super(name); + } + + @Override + protected Parameter[] getParameters() { + return new Parameter[] { meta }; + } + + @Override + public OffsetSourceFieldMapper build(MapperBuilderContext context) { + return new OffsetSourceFieldMapper( + leafName(), + new OffsetSourceFieldType(context.buildFullName(leafName()), meta.getValue()), + builderParams(this, context) + ); + } + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n)); + + public static final class OffsetSourceFieldType extends MappedFieldType { + public OffsetSourceFieldType(String name, Map meta) { + super(name, true, false, false, TextSearchInfo.NONE, meta); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public Query existsQuery(SearchExecutionContext context) { + return new TermQuery(new Term(NAME, name())); + } + + @Override + public boolean fieldHasValue(FieldInfos fieldInfos) { + return fieldInfos.fieldInfo(NAME) != null; + } + + @Override + public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) { + throw new IllegalArgumentException("[rank_feature] fields do not support sorting, scripting or aggregating"); + } + + @Override + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { + return new OffsetSourceValueFetcher(name()); + } + + @Override + public Query termQuery(Object value, SearchExecutionContext context) { + throw new IllegalArgumentException("Queries on [offset] fields are not supported"); + } + } + + /** + * @param simpleName the leaf name of the mapper + * @param mappedFieldType + * @param params initialization params for this field mapper + */ + protected OffsetSourceFieldMapper(String simpleName, MappedFieldType mappedFieldType, BuilderParams params) { + super(simpleName, mappedFieldType, params); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + protected boolean supportsParsingObject() { + return true; + } + + @Override + protected void parseCreateField(DocumentParserContext context) throws IOException { + XContentParser parser = context.parser(); + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); + String fieldName = null; + String sourceFieldName = null; + int startOffset = -1; + int endOffset = -1; + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + if (parser.currentToken() == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } else if (SOURCE_NAME_FIELD.equals(fieldName)) { + sourceFieldName = parser.text(); + } else if (START_OFFSET_FIELD.equals(fieldName)) { + startOffset = parser.intValue(); + } else if (END_OFFSET_FIELD.equals(fieldName)) { + endOffset = parser.intValue(); + } else { + throw new IllegalArgumentException("Unkown field name [" + fieldName + "]"); + } + } + context.doc().addWithKey(fullPath(), new OffsetField(NAME, fullPath() + "." + sourceFieldName, startOffset, endOffset)); + } + + @Override + public FieldMapper.Builder getMergeBuilder() { + return new Builder(leafName()).init(this); + } + + public static class OffsetsReader { + private final String fieldName; + private final Map postingsEnums = new LinkedHashMap<>(); + private String sourceFieldName; + private int startOffset; + private int endOffset; + + public OffsetsReader(Terms terms, String fieldName) throws IOException { + this.fieldName = fieldName; + Automaton prefixAutomaton = PrefixQuery.toAutomaton(new BytesRef(fieldName + ".")); + var termsEnum = terms.intersect(new CompiledAutomaton(prefixAutomaton, false, true, false), null); + while (termsEnum.next() != null) { + var postings = termsEnum.postings(null, PostingsEnum.OFFSETS); + if (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + String sourceFieldName = termsEnum.term().utf8ToString().substring(fieldName.length() + 1); + postingsEnums.put(sourceFieldName, postings); + } + } + } + + public boolean advanceTo(int doc) throws IOException { + for (var it = postingsEnums.entrySet().iterator(); it.hasNext();) { + var entry = it.next(); + var postings = entry.getValue(); + if (postings.docID() < doc) { + if (postings.advance(doc) == DocIdSetIterator.NO_MORE_DOCS) { + it.remove(); + continue; + } + } + if (postings.docID() == doc) { + assert postings.freq() == 1; + postings.nextPosition(); + sourceFieldName = entry.getKey(); + startOffset = postings.startOffset(); + endOffset = postings.endOffset(); + return true; + } + } + return false; + } + + public String getSourceFieldName() { + return sourceFieldName; + } + + public int getStartOffset() { + return startOffset; + } + + public int getEndOffset() { + return endOffset; + } + } + + private static class OffsetSourceValueFetcher implements ValueFetcher { + private final String fieldName; + private OffsetsReader reader; + + OffsetSourceValueFetcher(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public void setNextReader(LeafReaderContext context) { + try { + var terms = context.reader().terms(OffsetSourceMetaFieldMapper.NAME); + reader = terms != null ? new OffsetsReader(terms, fieldName) : null; + } catch (IOException exc) { + throw new UncheckedIOException(exc); + } + } + + @Override + public List fetchValues(Source source, int doc, List ignoredValues) throws IOException { + if (reader != null && reader.advanceTo(doc)) { + return List.of(Map.of("field", reader.sourceFieldName, "start", reader.startOffset, "end", reader.endOffset)); + } + return null; + } + + @Override + public StoredFieldsSpec storedFieldsSpec() { + return null; + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetSourceMetaFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetSourceMetaFieldMapper.java new file mode 100644 index 0000000000000..081aaebc7d82e --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/OffsetSourceMetaFieldMapper.java @@ -0,0 +1,78 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.xpack.inference.mapper; + +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.search.Query; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.MetadataFieldMapper; +import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.query.SearchExecutionContext; + +import java.util.Collections; + +/** + * This meta field only exists because offset source fields index everything into a + * common _offset_source field and Elasticsearch has a custom codec that complains + * when fields exist in the index and not in mappings. + */ +public class OffsetSourceMetaFieldMapper extends MetadataFieldMapper { + + public static final String NAME = "_offset_source"; + + public static final String CONTENT_TYPE = "_offset_source"; + + public static final TypeParser PARSER = new FixedTypeParser(c -> new OffsetSourceMetaFieldMapper()); + + public static final class OffsetSourceMetaFieldType extends MappedFieldType { + + public static final OffsetSourceMetaFieldType INSTANCE = new OffsetSourceMetaFieldType(); + + // made visible for tests + OffsetSourceMetaFieldType() { + super(NAME, false, false, false, TextSearchInfo.NONE, Collections.emptyMap()); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { + throw new UnsupportedOperationException("Cannot fetch values for internal field [" + typeName() + "]."); + } + + @Override + public Query existsQuery(SearchExecutionContext context) { + throw new UnsupportedOperationException("Cannot run exists query on [_offset_source]"); + } + + @Override + public boolean fieldHasValue(FieldInfos fieldInfos) { + return fieldInfos.fieldInfo(NAME) != null; + } + + @Override + public Query termQuery(Object value, SearchExecutionContext context) { + throw new UnsupportedOperationException("The [_offset_source] field may not be queried directly"); + } + } + + private OffsetSourceMetaFieldMapper() { + super(OffsetSourceMetaFieldType.INSTANCE); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java index e60e95b58770f..b36390cf8ef74 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java @@ -8,6 +8,8 @@ package org.elasticsearch.xpack.inference.mapper; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.support.XContentMapValues; @@ -17,6 +19,7 @@ import org.elasticsearch.inference.Model; import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.inference.TaskType; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.DeprecationHandler; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -31,6 +34,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -38,39 +42,49 @@ import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING; import static org.elasticsearch.inference.TaskType.TEXT_EMBEDDING; +import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; -import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; /** * A {@link ToXContentObject} that is used to represent the transformation of the semantic text field's inputs. - * The resulting object preserves the original input under the {@link SemanticTextField#TEXT_FIELD} and exposes - * the inference results under the {@link SemanticTextField#INFERENCE_FIELD}. * * @param fieldName The original field name. - * @param originalValues The original values associated with the field name. - * @param inference The inference result. * @param contentType The {@link XContentType} used to store the embeddings chunks. */ -public record SemanticTextField(String fieldName, List originalValues, InferenceResult inference, XContentType contentType) - implements - ToXContentObject { +public record SemanticTextField( + String fieldName, + String inferenceId, + ModelSettings modelSettings, + List chunks, + XContentType contentType +) implements ToXContentObject { - static final String TEXT_FIELD = "text"; - static final String INFERENCE_FIELD = "inference"; static final String INFERENCE_ID_FIELD = "inference_id"; static final String SEARCH_INFERENCE_ID_FIELD = "search_inference_id"; static final String CHUNKS_FIELD = "chunks"; static final String CHUNKED_EMBEDDINGS_FIELD = "embeddings"; - static final String CHUNKED_TEXT_FIELD = "text"; + static final String CHUNKED_OFFSET_FIELD = "offset"; + static final String CHUNKED_OFFSET_SOURCE_FIELD = "field"; + static final String CHUNKED_OFFSET_START_FIELD = "start"; + static final String CHUNKED_OFFSET_END_FIELD = "end"; static final String MODEL_SETTINGS_FIELD = "model_settings"; static final String TASK_TYPE_FIELD = "task_type"; static final String DIMENSIONS_FIELD = "dimensions"; static final String SIMILARITY_FIELD = "similarity"; static final String ELEMENT_TYPE_FIELD = "element_type"; - public record InferenceResult(String inferenceId, ModelSettings modelSettings, List chunks) {} + public record Chunk(Offset offset, BytesReference rawEmbeddings) {} - public record Chunk(String text, BytesReference rawEmbeddings) {} + public record Offset(String sourceFieldName, int startOffset, int endOffset) implements ToXContentObject { + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(CHUNKED_OFFSET_SOURCE_FIELD, sourceFieldName()); + builder.field(CHUNKED_OFFSET_START_FIELD, startOffset()); + builder.field(CHUNKED_OFFSET_END_FIELD, endOffset()); + return builder.endObject(); + } + } public record ModelSettings( TaskType taskType, @@ -172,16 +186,8 @@ private void validateFieldNotPresent(String field, Object fieldValue) { } } - public static String getOriginalTextFieldName(String fieldName) { - return fieldName + "." + TEXT_FIELD; - } - - public static String getInferenceFieldName(String fieldName) { - return fieldName + "." + INFERENCE_FIELD; - } - public static String getChunksFieldName(String fieldName) { - return getInferenceFieldName(fieldName) + "." + CHUNKS_FIELD; + return fieldName + "." + CHUNKS_FIELD; } public static String getEmbeddingsFieldName(String fieldName) { @@ -217,16 +223,13 @@ static ModelSettings parseModelSettingsFromMap(Object node) { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - if (originalValues.isEmpty() == false) { - builder.field(TEXT_FIELD, originalValues.size() == 1 ? originalValues.get(0) : originalValues); - } - builder.startObject(INFERENCE_FIELD); - builder.field(INFERENCE_ID_FIELD, inference.inferenceId); - builder.field(MODEL_SETTINGS_FIELD, inference.modelSettings); + builder.field(INFERENCE_ID_FIELD, inferenceId); + builder.field(MODEL_SETTINGS_FIELD, modelSettings); builder.startArray(CHUNKS_FIELD); - for (var chunk : inference.chunks) { + for (var chunk : chunks) { builder.startObject(); - builder.field(CHUNKED_TEXT_FIELD, chunk.text); + builder.field(CHUNKED_OFFSET_FIELD); + chunk.offset.toXContent(builder, params); XContentParser parser = XContentHelper.createParserNotCompressed( XContentParserConfiguration.EMPTY, chunk.rawEmbeddings, @@ -237,7 +240,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } builder.endArray(); builder.endObject(); - builder.endObject(); return builder; } @@ -248,25 +250,23 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws true, (args, context) -> new SemanticTextField( context.v1(), - (List) (args[0] == null ? List.of() : args[0]), - (InferenceResult) args[1], + (String) args[0], + (ModelSettings) args[1], + (List) args[2], context.v2() ) ); - @SuppressWarnings("unchecked") - private static final ConstructingObjectParser INFERENCE_RESULT_PARSER = new ConstructingObjectParser<>( - INFERENCE_FIELD, - true, - args -> new InferenceResult((String) args[0], (ModelSettings) args[1], (List) args[2]) - ); - private static final ConstructingObjectParser CHUNKS_PARSER = new ConstructingObjectParser<>( CHUNKS_FIELD, true, - args -> new Chunk((String) args[0], (BytesReference) args[1]) + args -> new Chunk((Offset) args[0], (BytesReference) args[1]) + ); + private static final ConstructingObjectParser OFFSET_PARSER = new ConstructingObjectParser<>( + CHUNKED_OFFSET_FIELD, + true, + args -> new Offset((String) args[0], (int) args[1], (int) args[2]) ); - private static final ConstructingObjectParser MODEL_SETTINGS_PARSER = new ConstructingObjectParser<>( MODEL_SETTINGS_FIELD, true, @@ -280,26 +280,35 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return new ModelSettings(taskType, dimensions, similarity, elementType); } ); - static { - SEMANTIC_TEXT_FIELD_PARSER.declareStringArray(optionalConstructorArg(), new ParseField(TEXT_FIELD)); + SEMANTIC_TEXT_FIELD_PARSER.declareString(constructorArg(), new ParseField(INFERENCE_ID_FIELD)); SEMANTIC_TEXT_FIELD_PARSER.declareObject( constructorArg(), - (p, c) -> INFERENCE_RESULT_PARSER.parse(p, null), - new ParseField(INFERENCE_FIELD) + (p, c) -> MODEL_SETTINGS_PARSER.parse(p, null), + new ParseField(MODEL_SETTINGS_FIELD) + ); + SEMANTIC_TEXT_FIELD_PARSER.declareObjectArray( + constructorArg(), + (p, c) -> CHUNKS_PARSER.parse(p, null), + new ParseField(CHUNKS_FIELD) ); - INFERENCE_RESULT_PARSER.declareString(constructorArg(), new ParseField(INFERENCE_ID_FIELD)); - INFERENCE_RESULT_PARSER.declareObject(constructorArg(), MODEL_SETTINGS_PARSER, new ParseField(MODEL_SETTINGS_FIELD)); - INFERENCE_RESULT_PARSER.declareObjectArray(constructorArg(), CHUNKS_PARSER, new ParseField(CHUNKS_FIELD)); - - CHUNKS_PARSER.declareString(constructorArg(), new ParseField(CHUNKED_TEXT_FIELD)); + CHUNKS_PARSER.declareField( + constructorArg(), + (p, c) -> OFFSET_PARSER.parse(p, null), + new ParseField(CHUNKED_OFFSET_FIELD), + ObjectParser.ValueType.OBJECT + ); CHUNKS_PARSER.declareField(constructorArg(), (p, c) -> { XContentBuilder b = XContentBuilder.builder(p.contentType().xContent()); b.copyCurrentStructure(p); return BytesReference.bytes(b); }, new ParseField(CHUNKED_EMBEDDINGS_FIELD), ObjectParser.ValueType.OBJECT_ARRAY); + OFFSET_PARSER.declareString(constructorArg(), new ParseField(CHUNKED_OFFSET_SOURCE_FIELD)); + OFFSET_PARSER.declareInt(constructorArg(), new ParseField(CHUNKED_OFFSET_START_FIELD)); + OFFSET_PARSER.declareInt(constructorArg(), new ParseField(CHUNKED_OFFSET_END_FIELD)); + MODEL_SETTINGS_PARSER.declareString(ConstructingObjectParser.constructorArg(), new ParseField(TASK_TYPE_FIELD)); MODEL_SETTINGS_PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), new ParseField(DIMENSIONS_FIELD)); MODEL_SETTINGS_PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), new ParseField(SIMILARITY_FIELD)); @@ -309,16 +318,46 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws /** * Converts the provided {@link ChunkedInferenceServiceResults} into a list of {@link Chunk}. */ - public static List toSemanticTextFieldChunks(List results, XContentType contentType) { + public static List toSemanticTextFieldChunks( + String sourceFieldName, + String input, + List results, + XContentType contentType + ) { List chunks = new ArrayList<>(); for (var result : results) { for (Iterator it = result.chunksAsMatchedTextAndByteReference(contentType.xContent()); it .hasNext();) { var chunkAsByteReference = it.next(); - chunks.add(new Chunk(chunkAsByteReference.matchedText(), chunkAsByteReference.bytesReference())); + int startOffset = input.indexOf(chunkAsByteReference.matchedText()); + chunks.add( + new Chunk( + new Offset(sourceFieldName, startOffset, startOffset + chunkAsByteReference.matchedText().length()), + chunkAsByteReference.bytesReference() + ) + ); } } return chunks; } + /** + * This method converts the given {@code valueObj} into a list of strings. + * If {@code valueObj} is not a string or a collection of strings, it throws an ElasticsearchStatusException. + */ + public static String nodeStringValues(String field, Object valueObj) { + if (valueObj instanceof Number || valueObj instanceof Boolean) { + return valueObj.toString(); + } else if (valueObj instanceof String value) { + return value; + } else if (valueObj instanceof Collection values) { + return Strings.collectionToDelimitedString(values, String.valueOf(MULTIVAL_SEP_CHAR)); + } + throw new ElasticsearchStatusException( + "Invalid format for field [{}], expected [String] got [{}]", + RestStatus.BAD_REQUEST, + field, + valueObj.getClass().getSimpleName() + ); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 4a2c5e43e6308..56ef3369ea352 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -14,11 +14,11 @@ import org.apache.lucene.search.join.ScoreMode; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Tuple; -import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; @@ -30,7 +30,6 @@ import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.InferenceFieldMapper; -import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperBuilderContext; @@ -38,7 +37,6 @@ import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.mapper.NestedObjectMapper; import org.elasticsearch.index.mapper.ObjectMapper; -import org.elasticsearch.index.mapper.SimpleMappedFieldType; import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.mapper.ValueFetcher; @@ -47,7 +45,6 @@ import org.elasticsearch.index.query.MatchNoneQueryBuilder; import org.elasticsearch.index.query.NestedQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.inference.InferenceResults; import org.elasticsearch.inference.SimilarityMeasure; @@ -58,6 +55,7 @@ import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; +import org.elasticsearch.xpack.inference.queries.SparseVectorQueryBuilder; import java.io.IOException; import java.util.ArrayList; @@ -66,80 +64,71 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Optional; import java.util.Set; import java.util.function.Function; import static org.elasticsearch.search.SearchService.DEFAULT_SIZE; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_TEXT_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKS_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.INFERENCE_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.INFERENCE_ID_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.MODEL_SETTINGS_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.SEARCH_INFERENCE_ID_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.TEXT_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getChunksFieldName; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getEmbeddingsFieldName; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getOriginalTextFieldName; import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.DEFAULT_ELSER_ID; /** * A {@link FieldMapper} for semantic text fields. */ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper { - public static final NodeFeature SEMANTIC_TEXT_SEARCH_INFERENCE_ID = new NodeFeature("semantic_text.search_inference_id"); - public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2"); - public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix"); - public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix"); - public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix"); - public static final NodeFeature SEMANTIC_TEXT_ZERO_SIZE_FIX = new NodeFeature("semantic_text.zero_size_fix"); - public static final NodeFeature SEMANTIC_TEXT_ALWAYS_EMIT_INFERENCE_ID_FIX = new NodeFeature( - "semantic_text.always_emit_inference_id_fix" - ); - public static final String CONTENT_TYPE = "semantic_text"; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; private final IndexSettings indexSettings; - public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, c.indexVersionCreated(), c::bitSetProducer, c.getIndexSettings()), - List.of(notInMultiFields(CONTENT_TYPE), notFromDynamicTemplates(CONTENT_TYPE)) - ); + public static final TypeParser PARSER = new TypeParser((n, c) -> { + if (c.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.INFERENCE_METADATA_FIELDS)) { + return new Builder(n, c::bitSetProducer, c.getIndexSettings()); + } + return new LegacySemanticTextFieldMapper.Builder(n, c.indexVersionCreated(), c::bitSetProducer, c.getIndexSettings()); + }, List.of(notInMultiFields(CONTENT_TYPE), notFromDynamicTemplates(CONTENT_TYPE))); public static class Builder extends FieldMapper.Builder { - private final IndexVersion indexVersionCreated; private final IndexSettings indexSettings; private final Parameter inferenceId = Parameter.stringParam( - INFERENCE_ID_FIELD, + SemanticTextField.INFERENCE_ID_FIELD, false, mapper -> ((SemanticTextFieldType) mapper.fieldType()).inferenceId, DEFAULT_ELSER_2_INFERENCE_ID ).addValidator(v -> { if (Strings.isEmpty(v)) { throw new IllegalArgumentException( - "[" + INFERENCE_ID_FIELD + "] on mapper [" + leafName() + "] of type [" + CONTENT_TYPE + "] must not be empty" + "[" + + SemanticTextField.INFERENCE_ID_FIELD + + "] on mapper [" + + leafName() + + "] of type [" + + CONTENT_TYPE + + "] must not be empty" ); } }).alwaysSerialize(); private final Parameter searchInferenceId = Parameter.stringParam( - SEARCH_INFERENCE_ID_FIELD, + SemanticTextField.SEARCH_INFERENCE_ID_FIELD, true, mapper -> ((SemanticTextFieldType) mapper.fieldType()).searchInferenceId, null ).acceptsNull().addValidator(v -> { if (v != null && Strings.isEmpty(v)) { throw new IllegalArgumentException( - "[" + SEARCH_INFERENCE_ID_FIELD + "] on mapper [" + leafName() + "] of type [" + CONTENT_TYPE + "] must not be empty" + "[" + + SemanticTextField.SEARCH_INFERENCE_ID_FIELD + + "] on mapper [" + + leafName() + + "] of type [" + + CONTENT_TYPE + + "] must not be empty" ); } }); private final Parameter modelSettings = new Parameter<>( - MODEL_SETTINGS_FIELD, + SemanticTextField.MODEL_SETTINGS_FIELD, true, () -> null, (n, c, o) -> SemanticTextField.parseModelSettingsFromMap(o), @@ -150,35 +139,18 @@ public static class Builder extends FieldMapper.Builder { private final Parameter> meta = Parameter.metaParam(); - private Function inferenceFieldBuilder; + private Function chunksFieldBuilder; public static Builder from(SemanticTextFieldMapper mapper) { - Builder builder = new Builder( - mapper.leafName(), - mapper.fieldType().indexVersionCreated, - mapper.fieldType().getChunksField().bitsetProducer(), - mapper.indexSettings - ); + Builder builder = new Builder(mapper.leafName(), mapper.fieldType().getChunksField().bitsetProducer(), mapper.indexSettings); builder.init(mapper); return builder; } - public Builder( - String name, - IndexVersion indexVersionCreated, - Function bitSetProducer, - IndexSettings indexSettings - ) { + public Builder(String name, Function bitSetProducer, IndexSettings indexSettings) { super(name); - this.indexVersionCreated = indexVersionCreated; this.indexSettings = indexSettings; - this.inferenceFieldBuilder = c -> createInferenceField( - c, - indexVersionCreated, - modelSettings.get(), - bitSetProducer, - indexSettings - ); + this.chunksFieldBuilder = c -> createChunksField(modelSettings.get(), bitSetProducer, indexSettings).build(c); } public Builder setInferenceId(String id) { @@ -209,9 +181,9 @@ protected void merge(FieldMapper mergeWith, Conflicts conflicts, MapperMergeCont super.merge(semanticMergeWith, conflicts, mapperMergeContext); conflicts.check(); var context = mapperMergeContext.createChildContext(semanticMergeWith.leafName(), ObjectMapper.Dynamic.FALSE); - var inferenceField = inferenceFieldBuilder.apply(context.getMapperBuilderContext()); - var mergedInferenceField = inferenceField.merge(semanticMergeWith.fieldType().getInferenceField(), context); - inferenceFieldBuilder = c -> mergedInferenceField; + var inferenceField = chunksFieldBuilder.apply(context.getMapperBuilderContext()); + var mergedInferenceField = inferenceField.merge(semanticMergeWith.fieldType().getChunksField(), context); + chunksFieldBuilder = c -> (NestedObjectMapper) mergedInferenceField; } @Override @@ -228,7 +200,7 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) { throw new IllegalArgumentException(CONTENT_TYPE + " field [" + fullName + "] cannot be nested"); } var childContext = context.createChildContext(leafName(), ObjectMapper.Dynamic.FALSE); - final ObjectMapper inferenceField = inferenceFieldBuilder.apply(childContext); + final NestedObjectMapper chunksField = chunksFieldBuilder.apply(childContext); return new SemanticTextFieldMapper( leafName(), @@ -237,8 +209,7 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) { inferenceId.getValue(), searchInferenceId.getValue(), modelSettings.getValue(), - inferenceField, - indexVersionCreated, + chunksField, meta.getValue() ), builderParams(this, context), @@ -279,7 +250,7 @@ private SemanticTextFieldMapper( @Override public Iterator iterator() { List subIterators = new ArrayList<>(); - subIterators.add(fieldType().getInferenceField()); + subIterators.add(fieldType().getChunksField()); return subIterators.iterator(); } @@ -311,15 +282,15 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio } final String fullFieldName = fieldType().name(); - if (field.inference().inferenceId().equals(fieldType().getInferenceId()) == false) { + if (field.inferenceId().equals(fieldType().getInferenceId()) == false) { throw new DocumentParsingException( xContentLocation, Strings.format( "The configured %s [%s] for field [%s] doesn't match the %s [%s] reported in the document.", - INFERENCE_ID_FIELD, - field.inference().inferenceId(), + SemanticTextField.INFERENCE_ID_FIELD, + field.inferenceId(), fullFieldName, - INFERENCE_ID_FIELD, + SemanticTextField.INFERENCE_ID_FIELD, fieldType().getInferenceId() ) ); @@ -328,15 +299,10 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio final SemanticTextFieldMapper mapper; if (fieldType().getModelSettings() == null) { context.path().remove(); - Builder builder = (Builder) new Builder( - leafName(), - fieldType().indexVersionCreated, - fieldType().getChunksField().bitsetProducer(), - indexSettings - ).init(this); + Builder builder = (Builder) new Builder(leafName(), fieldType().getChunksField().bitsetProducer(), indexSettings).init(this); try { - mapper = builder.setModelSettings(field.inference().modelSettings()) - .setInferenceId(field.inference().inferenceId()) + mapper = builder.setModelSettings(field.modelSettings()) + .setInferenceId(field.inferenceId()) .build(context.createDynamicMapperBuilderContext()); context.addDynamicMapper(mapper); } finally { @@ -344,7 +310,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio } } else { Conflicts conflicts = new Conflicts(fullFieldName); - canMergeModelSettings(fieldType().getModelSettings(), field.inference().modelSettings(), conflicts); + canMergeModelSettings(fieldType().getModelSettings(), field.modelSettings(), conflicts); try { conflicts.check(); } catch (Exception exc) { @@ -353,7 +319,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio "Incompatible model settings for field [" + fullPath() + "]. Check that the " - + INFERENCE_ID_FIELD + + SemanticTextField.INFERENCE_ID_FIELD + " is not using different model settings", exc ); @@ -363,7 +329,9 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio var chunksField = mapper.fieldType().getChunksField(); var embeddingsField = mapper.fieldType().getEmbeddingsField(); - for (var chunk : field.inference().chunks()) { + var offsetsField = mapper.fieldType().getOffsetsField(); + for (var chunk : field.chunks()) { + var nestedContext = context.createNestedContext(chunksField); try ( XContentParser subParser = XContentHelper.createParserNotCompressed( XContentParserConfiguration.EMPTY, @@ -371,10 +339,22 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.parser().contentType() ) ) { - DocumentParserContext subContext = context.createNestedContext(chunksField).switchParser(subParser); + DocumentParserContext subContext = nestedContext.switchParser(subParser); subParser.nextToken(); embeddingsField.parse(subContext); } + + try ( + XContentParser subParser = XContentHelper.createParserNotCompressed( + XContentParserConfiguration.EMPTY, + new BytesArray(Strings.toString(chunk.offset())), + context.parser().contentType() + ) + ) { + DocumentParserContext subContext = nestedContext.switchParser(subParser); + subParser.nextToken(); + offsetsField.parse(subContext); + } } } @@ -398,16 +378,8 @@ public InferenceFieldMetadata getMetadata(Set sourcePaths) { @Override public Object getOriginalValue(Map sourceAsMap) { - Object fieldValue = sourceAsMap.get(fullPath()); - if (fieldValue == null) { - return null; - } else if (fieldValue instanceof Map == false) { - // Don't try to further validate the non-map value, that will be handled when the source is fully parsed - return fieldValue; - } - - Map fieldValueMap = XContentMapValues.nodeMapValue(fieldValue, "Field [" + fullPath() + "]"); - return XContentMapValues.extractValue(TEXT_FIELD, fieldValueMap); + Object ret = XContentMapValues.extractValue(fullPath(), sourceAsMap); + return SemanticTextField.nodeStringValues(fullPath(), ret); } @Override @@ -429,28 +401,25 @@ protected void doValidate(MappingLookup mappers) { } } - public static class SemanticTextFieldType extends SimpleMappedFieldType { + public static class SemanticTextFieldType extends AbstractSemanticTextFieldType { private final String inferenceId; private final String searchInferenceId; private final SemanticTextField.ModelSettings modelSettings; - private final ObjectMapper inferenceField; - private final IndexVersion indexVersionCreated; + private final NestedObjectMapper chunksField; public SemanticTextFieldType( String name, String inferenceId, String searchInferenceId, SemanticTextField.ModelSettings modelSettings, - ObjectMapper inferenceField, - IndexVersion indexVersionCreated, + NestedObjectMapper chunksField, Map meta ) { super(name, true, false, false, TextSearchInfo.NONE, meta); this.inferenceId = inferenceId; this.searchInferenceId = searchInferenceId; this.modelSettings = modelSettings; - this.inferenceField = inferenceField; - this.indexVersionCreated = indexVersionCreated; + this.chunksField = chunksField; } @Override @@ -470,16 +439,16 @@ public SemanticTextField.ModelSettings getModelSettings() { return modelSettings; } - public ObjectMapper getInferenceField() { - return inferenceField; + public NestedObjectMapper getChunksField() { + return chunksField; } - public NestedObjectMapper getChunksField() { - return (NestedObjectMapper) inferenceField.getMapper(CHUNKS_FIELD); + public FieldMapper getOffsetsField() { + return (FieldMapper) getChunksField().getMapper(SemanticTextField.CHUNKED_OFFSET_FIELD); } public FieldMapper getEmbeddingsField() { - return (FieldMapper) getChunksField().getMapper(CHUNKED_EMBEDDINGS_FIELD); + return (FieldMapper) getChunksField().getMapper(SemanticTextField.CHUNKED_EMBEDDINGS_FIELD); } @Override @@ -495,7 +464,7 @@ public Query existsQuery(SearchExecutionContext context) { return NestedQueryBuilder.toQuery( (c -> getEmbeddingsField().fieldType().existsQuery(c)), - getChunksFieldName(name()), + SemanticTextField.getChunksFieldName(name()), ScoreMode.None, false, context @@ -504,10 +473,7 @@ public Query existsQuery(SearchExecutionContext context) { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - String fieldName = context.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.INFERENCE_METADATA_FIELDS) - ? name() - : getOriginalTextFieldName(name()); - return SourceValueFetcher.toString(fieldName, context, format); + return SourceValueFetcher.toString(name(), context, format); } @Override @@ -517,12 +483,12 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext @Override public boolean fieldHasValue(FieldInfos fieldInfos) { - return fieldInfos.fieldInfo(getEmbeddingsFieldName(name())) != null; + return fieldInfos.fieldInfo(SemanticTextField.getEmbeddingsFieldName(name())) != null; } public QueryBuilder semanticQuery(InferenceResults inferenceResults, Integer requestSize, float boost, String queryName) { - String nestedFieldPath = getChunksFieldName(name()); - String inferenceResultsFieldName = getEmbeddingsFieldName(name()); + String nestedFieldPath = SemanticTextField.getChunksFieldName(name()); + String inferenceResultsFieldName = SemanticTextField.getEmbeddingsFieldName(name()); QueryBuilder childQueryBuilder; if (modelSettings == null) { @@ -537,17 +503,15 @@ public QueryBuilder semanticQuery(InferenceResults inferenceResults, Integer req ); } - // TODO: Use WeightedTokensQueryBuilder TextExpansionResults textExpansionResults = (TextExpansionResults) inferenceResults; - var boolQuery = QueryBuilders.boolQuery(); - for (var weightedToken : textExpansionResults.getWeightedTokens()) { - boolQuery.should( - QueryBuilders.termQuery(inferenceResultsFieldName, weightedToken.token()).boost(weightedToken.weight()) - ); - } - boolQuery.minimumShouldMatch(1); - - yield boolQuery; + yield new SparseVectorQueryBuilder( + inferenceResultsFieldName, + textExpansionResults.getWeightedTokens(), + null, + null, + null, + null + ); } case TEXT_EMBEDDING -> { if (inferenceResults instanceof MlTextEmbeddingResults == false) { @@ -637,156 +601,32 @@ public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) { } } - /** - *

- * Insert or replace the path's value in the map with the provided new value. The map will be modified in-place. - * If the complete path does not exist in the map, it will be added to the deepest (sub-)map possible. - *

- *

- * For example, given the map: - *

- *
-     * {
-     *   "path1": {
-     *     "path2": {
-     *       "key1": "value1"
-     *     }
-     *   }
-     * }
-     * 
- *

- * And the caller wanted to insert {@code "path1.path2.path3.key2": "value2"}, the method would emit the modified map: - *

- *
-     * {
-     *   "path1": {
-     *     "path2": {
-     *       "key1": "value1",
-     *       "path3.key2": "value2"
-     *     }
-     *   }
-     * }
-     * 
- * - * @param path the value's path in the map. - * @param map the map to search and modify in-place. - * @param newValue the new value to assign to the path. - * - * @throws IllegalArgumentException If either the path cannot be fully traversed or there is ambiguity about where to insert the new - * value. - */ - public static void insertValue(String path, Map map, Object newValue) { - String[] pathElements = path.split("\\."); - if (pathElements.length == 0) { - return; - } - - List suffixMaps = extractSuffixMaps(pathElements, 0, map); - if (suffixMaps.isEmpty()) { - // This should never happen. Throw in case it does for some reason. - throw new IllegalStateException("extractSuffixMaps returned an empty suffix map list"); - } else if (suffixMaps.size() == 1) { - SuffixMap suffixMap = suffixMaps.getFirst(); - suffixMap.map().put(suffixMap.suffix(), newValue); - } else { - throw new IllegalArgumentException( - "Path [" + path + "] could be inserted in " + suffixMaps.size() + " distinct ways, it is ambiguous which one to use" - ); - } - } - - private record SuffixMap(String suffix, Map map) {} - - private static List extractSuffixMaps(String[] pathElements, int index, Object currentValue) { - if (currentValue instanceof List valueList) { - List suffixMaps = new ArrayList<>(valueList.size()); - for (Object o : valueList) { - suffixMaps.addAll(extractSuffixMaps(pathElements, index, o)); - } - - return suffixMaps; - } else if (currentValue instanceof Map) { - @SuppressWarnings("unchecked") - Map map = (Map) currentValue; - List suffixMaps = new ArrayList<>(map.size()); - - String key = pathElements[index]; - while (index < pathElements.length) { - if (map.containsKey(key)) { - if (index + 1 == pathElements.length) { - // We found the complete path - suffixMaps.add(new SuffixMap(key, map)); - } else { - // We've matched that path partially, keep traversing to try to match it fully - suffixMaps.addAll(extractSuffixMaps(pathElements, index + 1, map.get(key))); - } - } - - if (++index < pathElements.length) { - key += "." + pathElements[index]; - } - } - - if (suffixMaps.isEmpty()) { - // We checked for all remaining elements in the path, and they do not exist. This means we found a leaf map that we should - // add the value to. - suffixMaps.add(new SuffixMap(key, map)); - } - - return suffixMaps; - } else { - throw new IllegalArgumentException( - "Path [" - + String.join(".", Arrays.copyOfRange(pathElements, 0, index)) - + "] has value [" - + currentValue - + "] of type [" - + currentValue.getClass().getSimpleName() - + "], which cannot be traversed into further" - ); - } - } - - private static ObjectMapper createInferenceField( - MapperBuilderContext context, - IndexVersion indexVersionCreated, - @Nullable SemanticTextField.ModelSettings modelSettings, - Function bitSetProducer, - IndexSettings indexSettings - ) { - return new ObjectMapper.Builder(INFERENCE_FIELD, Optional.of(ObjectMapper.Subobjects.ENABLED)).dynamic(ObjectMapper.Dynamic.FALSE) - .add(createChunksField(indexVersionCreated, modelSettings, bitSetProducer, indexSettings)) - .build(context); - } - private static NestedObjectMapper.Builder createChunksField( - IndexVersion indexVersionCreated, @Nullable SemanticTextField.ModelSettings modelSettings, Function bitSetProducer, IndexSettings indexSettings ) { NestedObjectMapper.Builder chunksField = new NestedObjectMapper.Builder( - CHUNKS_FIELD, - indexVersionCreated, + SemanticTextField.CHUNKS_FIELD, + indexSettings.getIndexVersionCreated(), bitSetProducer, indexSettings ); chunksField.dynamic(ObjectMapper.Dynamic.FALSE); - KeywordFieldMapper.Builder chunkTextField = new KeywordFieldMapper.Builder(CHUNKED_TEXT_FIELD, indexVersionCreated).indexed(false) - .docValues(false); + if (modelSettings != null) { - chunksField.add(createEmbeddingsField(indexVersionCreated, modelSettings)); + chunksField.add(createEmbeddingsField(indexSettings.getIndexVersionCreated(), modelSettings)); } - chunksField.add(chunkTextField); + chunksField.add(new OffsetSourceFieldMapper.Builder(SemanticTextField.CHUNKED_OFFSET_FIELD)); return chunksField; } private static Mapper.Builder createEmbeddingsField(IndexVersion indexVersionCreated, SemanticTextField.ModelSettings modelSettings) { return switch (modelSettings.taskType()) { - case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(CHUNKED_EMBEDDINGS_FIELD); + case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(SemanticTextField.CHUNKED_EMBEDDINGS_FIELD); case TEXT_EMBEDDING -> { DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder( - CHUNKED_EMBEDDINGS_FIELD, + SemanticTextField.CHUNKED_EMBEDDINGS_FIELD, indexVersionCreated ); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index d648db2fbfdbc..46c6da34a8f33 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -37,7 +37,7 @@ import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults; -import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; +import org.elasticsearch.xpack.inference.mapper.AbstractSemanticTextFieldType; import java.io.IOException; import java.util.Collection; @@ -162,7 +162,7 @@ private QueryBuilder doRewriteBuildSemanticQuery(SearchExecutionContext searchEx MappedFieldType fieldType = searchExecutionContext.getFieldType(fieldName); if (fieldType == null) { return new MatchNoneQueryBuilder(); - } else if (fieldType instanceof SemanticTextFieldMapper.SemanticTextFieldType semanticTextFieldType) { + } else if (fieldType instanceof AbstractSemanticTextFieldType semanticTextFieldType) { if (inferenceResults == null) { // This should never happen, but throw on it in case it ever does throw new IllegalStateException( diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SparseVectorQuery.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SparseVectorQuery.java new file mode 100644 index 0000000000000..32e4623454f17 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SparseVectorQuery.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.queries; + +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; + +import java.io.IOException; +import java.util.Objects; + +public class SparseVectorQuery extends Query { + private final String fieldName; + private final Query termsQuery; + + public SparseVectorQuery(String fieldName, Query termsQuery) { + this.fieldName = fieldName; + this.termsQuery = termsQuery; + } + + public Query getTermsQuery() { + return termsQuery; + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + var rewrite = termsQuery.rewrite(indexSearcher); + if (rewrite != termsQuery) { + return new SparseVectorQuery(fieldName, rewrite); + } + return this; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return termsQuery.createWeight(searcher, scoreMode, boost); + } + + @Override + public String toString(String field) { + return termsQuery.toString(field); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(fieldName)) { + termsQuery.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this)); + } + } + + @Override + public boolean equals(Object obj) { + if (sameClassAs(obj) == false) { + return false; + } + SparseVectorQuery that = (SparseVectorQuery) obj; + return fieldName.equals(that.fieldName) && termsQuery.equals(that.termsQuery); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), fieldName, termsQuery); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SparseVectorQueryBuilder.java similarity index 98% rename from x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SparseVectorQueryBuilder.java index 5a63ad8e85e9b..752009b7b910a 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SparseVectorQueryBuilder.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.inference.queries; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; @@ -33,9 +33,7 @@ import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; import org.elasticsearch.xpack.core.ml.search.WeightedToken; -import org.elasticsearch.xpack.core.ml.search.WeightedTokensUtils; import java.io.IOException; import java.util.ArrayList; @@ -210,7 +208,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { return (shouldPruneTokens) ? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, queryVectors, ft, context) - : WeightedTokensUtils.queryBuilderWithAllTokens(queryVectors, ft, context); + : WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, ft, context); } @Override diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/TextExpansionQueryBuilder.java similarity index 98% rename from x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/TextExpansionQueryBuilder.java index 6d972bcf5863a..be435bd18b55c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/TextExpansionQueryBuilder.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.inference.queries; import org.apache.lucene.search.Query; import org.apache.lucene.util.SetOnce; @@ -32,8 +32,6 @@ import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; -import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder; import java.io.IOException; import java.util.List; @@ -41,7 +39,7 @@ import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN; import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin; -import static org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder.PRUNING_CONFIG; +import static org.elasticsearch.xpack.inference.queries.WeightedTokensQueryBuilder.PRUNING_CONFIG; /** * @deprecated Replaced by sparse_vector query diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TokenPruningConfig.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/TokenPruningConfig.java similarity index 98% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TokenPruningConfig.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/TokenPruningConfig.java index 13358839830ed..6f5c2995af8b8 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TokenPruningConfig.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/TokenPruningConfig.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.core.ml.search; +package org.elasticsearch.xpack.inference.queries; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; @@ -22,7 +22,7 @@ import java.util.Objects; import java.util.Set; -import static org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder.PRUNING_CONFIG; +import static org.elasticsearch.xpack.inference.queries.WeightedTokensQueryBuilder.PRUNING_CONFIG; public class TokenPruningConfig implements Writeable, ToXContentObject { public static final ParseField TOKENS_FREQ_RATIO_THRESHOLD = new ParseField("tokens_freq_ratio_threshold"); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/WeightedTokensQueryBuilder.java similarity index 97% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/WeightedTokensQueryBuilder.java index 256c90c3eaa62..8246b8a399310 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/WeightedTokensQueryBuilder.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.core.ml.search; +package org.elasticsearch.xpack.inference.queries; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; @@ -24,6 +24,7 @@ import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xpack.core.ml.search.WeightedToken; import java.io.IOException; import java.util.ArrayList; @@ -125,7 +126,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { } return (this.tokenPruningConfig == null) - ? WeightedTokensUtils.queryBuilderWithAllTokens(tokens, ft, context) + ? WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, tokens, ft, context) : WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, tokens, ft, context); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/WeightedTokensUtils.java similarity index 89% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/WeightedTokensUtils.java index 133920416d227..0d3e628cea07a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/WeightedTokensUtils.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.core.ml.search; +package org.elasticsearch.xpack.inference.queries; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -16,6 +16,7 @@ import org.apache.lucene.search.Query; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.xpack.core.ml.search.WeightedToken; import java.io.IOException; import java.util.List; @@ -24,13 +25,18 @@ public final class WeightedTokensUtils { private WeightedTokensUtils() {} - public static Query queryBuilderWithAllTokens(List tokens, MappedFieldType ft, SearchExecutionContext context) { + public static Query queryBuilderWithAllTokens( + String fieldName, + List tokens, + MappedFieldType ft, + SearchExecutionContext context + ) { var qb = new BooleanQuery.Builder(); for (var token : tokens) { qb.add(new BoostQuery(ft.termQuery(token.token(), context), token.weight()), BooleanClause.Occur.SHOULD); } - return qb.setMinimumNumberShouldMatch(1).build(); + return new SparseVectorQuery(fieldName, qb.setMinimumNumberShouldMatch(1).build()); } public static Query queryBuilderWithPrunedTokens( @@ -64,7 +70,7 @@ public static Query queryBuilderWithPrunedTokens( } } - return qb.setMinimumNumberShouldMatch(1).build(); + return new SparseVectorQuery(fieldName, qb.setMinimumNumberShouldMatch(1).build()); } /** diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java index 7eb5bfbe5a055..b70a3b0e6f39b 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java @@ -355,13 +355,7 @@ private static BulkItemRequest[] randomBulkItemRequest( // embeddings were overwritten. if (model.hasResult(inputText)) { ChunkedInferenceServiceResults results = model.getResults(inputText); - semanticTextField = semanticTextFieldFromChunkedInferenceResults( - field, - model, - List.of(inputText), - results, - requestContentType - ); + semanticTextField = semanticTextFieldFromChunkedInferenceResults(field, model, inputText, results, requestContentType); } else { semanticTextField = randomSemanticText(field, model, List.of(inputText), requestContentType); model.putResult(inputText, toChunkedResult(semanticTextField)); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java new file mode 100644 index 0000000000000..bb297009d76ba --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java @@ -0,0 +1,572 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.highlight; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.join.ScoreMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.elasticsearch.action.OriginalIndices; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.Streams; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperServiceTestCase; +import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.index.query.NestedQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.fetch.FetchContext; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; +import org.elasticsearch.search.internal.AliasFilter; +import org.elasticsearch.search.internal.ShardSearchRequest; +import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.search.rank.RankDoc; +import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.InferencePlugin; +import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; +import org.junit.Before; +import org.mockito.Mockito; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.mockito.Mockito.mock; + +public class SemanticTextHighlighterTests extends MapperServiceTestCase { + private MapperService mapperService; + private DocumentMapper documentMapper; + + private static final String MAPPINGS = """ + { + "_doc": { + "properties": { + "field": { + "type": "text", + "copy_to": ["sparse_field", "dense_field"] + }, + "sparse_field": { + "type": "semantic_text", + "inference_id": ".elser-2-elasticsearch", + "model_settings": { + "task_type": "sparse_embedding" + } + }, + "dense_field": { + "type": "semantic_text", + "inference_id": ".multilingual-e5-small-elasticsearch", + "model_settings": { + "task_type": "text_embedding", + "dimensions": 384, + "similarity": "cosine", + "element_type": "float" + } + } + } + } + } + """; + + @Override + protected Collection getPlugins() { + return List.of(new InferencePlugin(Settings.EMPTY)); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + mapperService = createMapperService(MAPPINGS); + documentMapper = mapperService.documentMapper(); + } + + private void assertHighlightOneDoc(ShardSearchRequest request, SourceToParse source, String fieldName, String[] expectedPassages) + throws Exception { + SemanticTextFieldMapper fieldMapper = (SemanticTextFieldMapper) mapperService.mappingLookup().getMapper(fieldName); + var doc = documentMapper.parse(source); + assertNull(doc.dynamicMappingsUpdate()); + try (Directory dir = newDirectory()) { + IndexWriterConfig iwc = newIndexWriterConfig(new StandardAnalyzer()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + iw.addDocuments(doc.docs()); + try (DirectoryReader reader = wrapInMockESDirectoryReader(iw.getReader())) { + IndexSearcher searcher = newSearcher(reader); + iw.close(); + TopDocs topDocs = searcher.search(Queries.newNonNestedFilter(IndexVersion.current()), 1, Sort.INDEXORDER); + assertThat(topDocs.totalHits.value(), equalTo(1L)); + int docID = topDocs.scoreDocs[0].doc; + SemanticTextHighlighter highlighter = new SemanticTextHighlighter(); + var execContext = createSearchExecutionContext(mapperService); + var luceneQuery = execContext.toQuery(request.source().query()).query(); + FetchContext fetchContext = mock(FetchContext.class); + Mockito.when(fetchContext.highlight()).thenReturn(new SearchHighlightContext(Collections.emptyList())); + Mockito.when(fetchContext.query()).thenReturn(luceneQuery); + Mockito.when(fetchContext.request()).thenReturn(request); + Mockito.when(fetchContext.getSearchExecutionContext()).thenReturn(execContext); + + FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext( + new SearchHit(docID), + getOnlyLeafReader(reader).getContext(), + docID, + Map.of(), + Source.fromBytes(source.source()), + new RankDoc(docID, Float.NaN, 0) + ); + try { + var highlightContext = new HighlightBuilder().field(fieldName, 512, 1).highlighterType("semantic").build(execContext); + + for (var fieldContext : highlightContext.fields()) { + FieldHighlightContext context = new FieldHighlightContext( + fieldName, + fieldContext, + fieldMapper.fieldType(), + fetchContext, + hitContext, + luceneQuery, + new HashMap<>() + ); + var result = highlighter.highlight(context); + System.out.println(Strings.toString(result, true, true)); + } + } finally { + hitContext.hit().decRef(); + } + } + } + } + + private SearchRequest createSearchRequest(QueryBuilder queryBuilder) { + SearchRequest request = new SearchRequest(); + request.source(new SearchSourceBuilder()); + request.allowPartialSearchResults(false); + request.source().query(queryBuilder); + return request; + } + + private ShardSearchRequest createShardSearchRequest(QueryBuilder queryBuilder) { + SearchRequest request = createSearchRequest(queryBuilder); + return new ShardSearchRequest(OriginalIndices.NONE, request, new ShardId("index", "index", 0), 0, 1, AliasFilter.EMPTY, 1, 0, null); + } + + public void testDenseVector() throws Exception { + float[] vector = new float[] { + 0.09475211f, + 0.044564713f, + -0.04378501f, + -0.07908551f, + 0.04332011f, + -0.03891992f, + -0.0062305215f, + 0.024245035f, + -0.008976331f, + 0.032832284f, + 0.052760173f, + 0.008123907f, + 0.09049037f, + -0.01637332f, + -0.054353267f, + 0.00771307f, + 0.08545496f, + -0.079716265f, + -0.045666866f, + -0.04369993f, + 0.009189822f, + -0.013782891f, + -0.07701858f, + 0.037278354f, + 0.049807206f, + 0.078036495f, + -0.059533164f, + 0.051413406f, + 0.040234447f, + -0.038139492f, + -0.085189626f, + -0.045546446f, + 0.0544375f, + -0.05604156f, + 0.057408098f, + 0.041913517f, + -0.037348013f, + -0.025998272f, + 0.08486864f, + -0.046678443f, + 0.0041820924f, + 0.007514462f, + 0.06424746f, + 0.044233218f, + 0.103267275f, + 0.014130771f, + -0.049954403f, + 0.04226959f, + -0.08346965f, + -0.01639249f, + -0.060537644f, + 0.04546336f, + 0.012866155f, + 0.05375096f, + 0.036775924f, + -0.0762226f, + -0.037304543f, + -0.05692274f, + -0.055807598f, + 0.0040082196f, + 0.059259634f, + 0.012022011f, + -8.0863154E-4f, + 0.0070405705f, + 0.050255686f, + 0.06810016f, + 0.017190414f, + 0.051975194f, + -0.051436286f, + 0.023408439f, + -0.029802637f, + 0.034137156f, + -0.004660689f, + -0.0442122f, + 0.019065322f, + 0.030806554f, + 0.0064652697f, + -0.066789865f, + 0.057111286f, + 0.009412479f, + -0.041444767f, + -0.06807582f, + -0.085881524f, + 0.04901128f, + -0.047871742f, + 0.06328623f, + 0.040418074f, + -0.081432894f, + 0.058384005f, + 0.006206527f, + 0.045801315f, + 0.037274595f, + -0.054337103f, + -0.06755516f, + -0.07396888f, + -0.043732334f, + -0.052053086f, + 0.03210978f, + 0.048101492f, + -0.083828256f, + 0.05205026f, + -0.048474856f, + 0.029116616f, + -0.10924888f, + 0.003796487f, + 0.030567763f, + 0.026949523f, + -0.052353345f, + 0.043198872f, + -0.09456988f, + -0.05711594f, + -2.2292069E-4f, + 0.032972734f, + 0.054394923f, + -0.0767535f, + -0.02710579f, + -0.032135617f, + -0.01732382f, + 0.059442326f, + -0.07686165f, + 0.07104082f, + -0.03090021f, + -0.05450075f, + -0.038997203f, + -0.07045443f, + 0.00483161f, + 0.010933604f, + 0.020874644f, + 0.037941266f, + 0.019729063f, + 0.06178368f, + 0.013503478f, + -0.008584046f, + 0.045592044f, + 0.05528768f, + 0.11568184f, + 0.0041300594f, + 0.015404516f, + -3.8067883E-4f, + -0.06365399f, + -0.07826643f, + 0.061575573f, + -0.060548335f, + 0.05706082f, + 0.042301804f, + 0.052173313f, + 0.07193179f, + -0.03839231f, + 0.0734415f, + -0.045380164f, + 0.02832276f, + 0.003745178f, + 0.058844633f, + 0.04307504f, + 0.037800383f, + -0.031050054f, + -0.06856359f, + -0.059114788f, + -0.02148857f, + 0.07854358f, + -0.03253363f, + -0.04566468f, + -0.019933948f, + -0.057993464f, + -0.08677458f, + -0.06626883f, + 0.031657256f, + 0.101128764f, + -0.08050056f, + -0.050226066f, + -0.014335166f, + 0.050344367f, + -0.06851419f, + 0.008698909f, + -0.011893435f, + 0.07741272f, + -0.059579294f, + 0.03250109f, + 0.058700256f, + 0.046834726f, + -0.035081457f, + -0.0043140925f, + -0.09764087f, + -0.0034994273f, + -0.034056358f, + -0.019066337f, + -0.034376107f, + 0.012964423f, + 0.029291175f, + -0.012090671f, + 0.021585712f, + 0.028859599f, + -0.04391145f, + -0.071166754f, + -0.031040335f, + 0.02808108f, + -0.05621317f, + 0.06543945f, + 0.10094665f, + 0.041057374f, + -0.03222324f, + -0.063366964f, + 0.064944476f, + 0.023641933f, + 0.06806713f, + 0.06806097f, + -0.08220105f, + 0.04148528f, + -0.09254079f, + 0.044620737f, + 0.05526614f, + -0.03849534f, + -0.04722273f, + 0.0670776f, + -0.024274077f, + -0.016903497f, + 0.07584147f, + 0.04760533f, + -0.038843267f, + -0.028365409f, + 0.08022705f, + -0.039916333f, + 0.049067073f, + -0.030701574f, + -0.057169467f, + 0.043025102f, + 0.07109674f, + -0.047296863f, + -0.047463104f, + 0.040868305f, + -0.04409507f, + -0.034977127f, + -0.057109762f, + -0.08616165f, + -0.03486079f, + -0.046201482f, + 0.025963873f, + 0.023392359f, + 0.09594902f, + -0.007847159f, + -0.021231368f, + 0.009007263f, + 0.0032713825f, + -0.06876065f, + 0.03169641f, + -7.2582875E-4f, + -0.07049708f, + 0.03900843f, + -0.0075472407f, + 0.05184822f, + 0.06452079f, + -0.09832754f, + -0.012775799f, + -0.03925948f, + -0.029761659f, + 0.0065437574f, + 0.0815465f, + 0.0411695f, + -0.0702844f, + -0.009533786f, + 0.07024532f, + 0.0098710675f, + 0.09915362f, + 0.0415453f, + 0.050641853f, + 0.047463298f, + -0.058609713f, + -0.029499197f, + -0.05100956f, + -0.03441709f, + -0.06348122f, + 0.014784361f, + 0.056317374f, + -0.10280704f, + -0.04008354f, + -0.018926824f, + 0.08832836f, + 0.124804f, + -0.047645308f, + -0.07122146f, + -9.886527E-4f, + 0.03850324f, + 0.048501793f, + 0.07072816f, + 0.06566776f, + -0.013678872f, + 0.010010848f, + 0.06483413f, + -0.030036367f, + -0.029748922f, + -0.007482364f, + -0.05180385f, + 0.03698522f, + -0.045453787f, + 0.056604166f, + 0.029394176f, + 0.028589265f, + -0.012185886f, + -0.06919616f, + 0.0711641f, + -0.034055933f, + -0.053101335f, + 0.062319f, + 0.021600349f, + -0.038718067f, + 0.060814686f, + 0.05087301f, + -0.020297311f, + 0.016493896f, + 0.032162152f, + 0.046740912f, + 0.05461355f, + -0.07024665f, + 0.025609337f, + -0.02504801f, + 0.06765588f, + -0.032994855f, + -0.037897404f, + -0.045783922f, + -0.05689299f, + -0.040437017f, + -0.07904339f, + -0.031415287f, + -0.029216278f, + 0.017395392f, + 0.03449264f, + -0.025653394f, + -0.06283088f, + 0.049027324f, + 0.016229525f, + -0.00985347f, + -0.053974394f, + -0.030257035f, + 0.04325515f, + -0.012293731f, + -0.002446129f, + -0.05567076f, + 0.06374684f, + -0.03153897f, + -0.04475149f, + 0.018582936f, + 0.025716115f, + -0.061778374f, + 0.04196277f, + -0.04134671f, + -0.07396272f, + 0.05846184f, + 0.006558759f, + -0.09745666f, + 0.07587805f, + 0.0137483915f, + -0.100933895f, + 0.032008193f, + 0.04293283f, + 0.017870268f, + 0.032806385f, + -0.0635923f, + -0.019672254f, + 0.022225974f, + 0.04304554f, + -0.06043949f, + -0.0285274f, + 0.050868835f, + 0.057003833f, + 0.05740866f, + 0.020068677f, + -0.034312245f, + -0.021671802f, + 0.014769731f, + -0.07328285f, + -0.009586734f, + 0.036420938f, + -0.022188472f, + -0.008200541f, + -0.010765854f, + -0.06949713f, + -0.07555878f, + 0.045306854f, + -0.05424466f, + -0.03647476f, + 0.06266633f, + 0.08346125f, + 0.060288202f, + 0.0548457f }; + KnnVectorQueryBuilder knnQuery = new KnnVectorQueryBuilder("dense_field.chunks.embeddings", vector, 10, 10, null); + NestedQueryBuilder nestedQueryBuilder = new NestedQueryBuilder("dense_field.chunks", knnQuery, ScoreMode.Max); + var shardRequest = createShardSearchRequest(nestedQueryBuilder); + var sourceToParse = new SourceToParse( + "0", + Streams.readFully(SemanticTextHighlighterTests.class.getResourceAsStream("moby-dick.json")), + XContentType.JSON + ); + assertHighlightOneDoc(shardRequest, sourceToParse, "dense_field", new String[0]); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldMapperTests.java new file mode 100644 index 0000000000000..3e1b5d7e611d1 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldMapperTests.java @@ -0,0 +1,1227 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.mapper; + +import org.apache.lucene.document.FeatureField; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.search.join.QueryBitSetProducer; +import org.apache.lucene.search.join.ScoreMode; +import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.CheckedBiConsumer; +import org.elasticsearch.common.CheckedBiFunction; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.DocumentParsingException; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.LuceneDocument; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperTestCase; +import org.elasticsearch.index.mapper.NestedLookup; +import org.elasticsearch.index.mapper.NestedObjectMapper; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.index.search.ESToParentBlockJoinQuery; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.LeafNestedDocuments; +import org.elasticsearch.search.NestedDocuments; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.index.IndexVersionUtils; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.elasticsearch.xpack.inference.InferencePlugin; +import org.elasticsearch.xpack.inference.model.TestModel; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.stream.Stream; + +import static java.util.Collections.singletonList; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextField.CHUNKED_EMBEDDINGS_FIELD; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextField.CHUNKED_TEXT_FIELD; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextField.CHUNKS_FIELD; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextField.INFERENCE_FIELD; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextField.INFERENCE_ID_FIELD; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextField.MODEL_SETTINGS_FIELD; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextField.SEARCH_INFERENCE_ID_FIELD; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextField.getChunksFieldName; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextField.getEmbeddingsFieldName; +import static org.elasticsearch.xpack.inference.mapper.LegacySemanticTextFieldTests.randomSemanticText; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class LegacySemanticTextFieldMapperTests extends MapperTestCase { + @Override + protected Collection getPlugins() { + return singletonList(new InferencePlugin(Settings.EMPTY)); + } + + @Override + protected Settings getIndexSettings() { + return Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersionUtils.getPreviousVersion(IndexVersions.INFERENCE_METADATA_FIELDS)) + .build(); + } + + @Override + protected IndexVersion getVersion() { + return IndexVersionUtils.getPreviousVersion(IndexVersions.INFERENCE_METADATA_FIELDS); + } + + @Override + protected void minimalMapping(XContentBuilder b) throws IOException { + b.field("type", "semantic_text"); + } + + @Override + protected String minimalIsInvalidRoutingPathErrorMessage(Mapper mapper) { + return "cannot have nested fields when index is in [index.mode=time_series]"; + } + + @Override + protected void metaMapping(XContentBuilder b) throws IOException { + super.metaMapping(b); + b.field(INFERENCE_ID_FIELD, DEFAULT_ELSER_2_INFERENCE_ID); + } + + @Override + protected Object getSampleValueForDocument() { + return null; + } + + @Override + protected boolean supportsIgnoreMalformed() { + return false; + } + + @Override + protected boolean supportsStoredFields() { + return false; + } + + @Override + protected void registerParameters(ParameterChecker checker) throws IOException {} + + @Override + protected Object generateRandomInputValue(MappedFieldType ft) { + assumeFalse("doc_values are not supported in semantic_text", true); + return null; + } + + @Override + protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) { + throw new AssumptionViolatedException("not supported"); + } + + @Override + protected IngestScriptSupport ingestScriptSupport() { + throw new AssumptionViolatedException("not supported"); + } + + @Override + public MappedFieldType getMappedFieldType() { + return new LegacySemanticTextFieldMapper.LegacySemanticTextFieldType( + "field", + "fake-inference-id", + null, + null, + null, + IndexVersionUtils.randomVersionBetween( + random(), + IndexVersionUtils.getFirstVersion(), + IndexVersionUtils.getPreviousVersion(IndexVersions.INFERENCE_METADATA_FIELDS) + ), + Map.of() + ); + } + + @Override + protected void assertSearchable(MappedFieldType fieldType) { + assertThat(fieldType, instanceOf(LegacySemanticTextFieldMapper.LegacySemanticTextFieldType.class)); + assertTrue(fieldType.isIndexed()); + assertTrue(fieldType.isSearchable()); + } + + public void testDefaults() throws Exception { + final String fieldName = "field"; + final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping); + final XContentBuilder expectedMapping = fieldMapping(this::metaMapping); + + MapperService mapperService = createMapperService(fieldMapping); + DocumentMapper mapper = mapperService.documentMapper(); + assertEquals(Strings.toString(expectedMapping), mapper.mappingSource().toString()); + assertSemanticTextField(mapperService, fieldName, false); + assertInferenceEndpoints(mapperService, fieldName, DEFAULT_ELSER_2_INFERENCE_ID, DEFAULT_ELSER_2_INFERENCE_ID); + + ParsedDocument doc1 = mapper.parse(source(this::writeField)); + List fields = doc1.rootDoc().getFields("field"); + + // No indexable fields + assertTrue(fields.isEmpty()); + } + + @Override + public void testFieldHasValue() { + MappedFieldType fieldType = getMappedFieldType(); + FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { getFieldInfoWithName(getEmbeddingsFieldName("field")) }); + assertTrue(fieldType.fieldHasValue(fieldInfos)); + } + + public void testSetInferenceEndpoints() throws IOException { + final String fieldName = "field"; + final String inferenceId = "foo"; + final String searchInferenceId = "bar"; + + CheckedBiConsumer assertSerialization = (expectedMapping, mapperService) -> { + DocumentMapper mapper = mapperService.documentMapper(); + assertEquals(Strings.toString(expectedMapping), mapper.mappingSource().toString()); + }; + + { + final XContentBuilder fieldMapping = fieldMapping(b -> b.field("type", "semantic_text").field(INFERENCE_ID_FIELD, inferenceId)); + final MapperService mapperService = createMapperService(fieldMapping); + assertSemanticTextField(mapperService, fieldName, false); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); + assertSerialization.accept(fieldMapping, mapperService); + } + { + final XContentBuilder fieldMapping = fieldMapping( + b -> b.field("type", "semantic_text").field(SEARCH_INFERENCE_ID_FIELD, searchInferenceId) + ); + final XContentBuilder expectedMapping = fieldMapping( + b -> b.field("type", "semantic_text") + .field(INFERENCE_ID_FIELD, DEFAULT_ELSER_2_INFERENCE_ID) + .field(SEARCH_INFERENCE_ID_FIELD, searchInferenceId) + ); + final MapperService mapperService = createMapperService(fieldMapping); + assertSemanticTextField(mapperService, fieldName, false); + assertInferenceEndpoints(mapperService, fieldName, DEFAULT_ELSER_2_INFERENCE_ID, searchInferenceId); + assertSerialization.accept(expectedMapping, mapperService); + } + { + final XContentBuilder fieldMapping = fieldMapping( + b -> b.field("type", "semantic_text") + .field(INFERENCE_ID_FIELD, inferenceId) + .field(SEARCH_INFERENCE_ID_FIELD, searchInferenceId) + ); + MapperService mapperService = createMapperService(fieldMapping); + assertSemanticTextField(mapperService, fieldName, false); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId); + assertSerialization.accept(fieldMapping, mapperService); + } + } + + public void testInvalidInferenceEndpoints() { + { + Exception e = expectThrows( + MapperParsingException.class, + () -> createMapperService(fieldMapping(b -> b.field("type", "semantic_text").field(INFERENCE_ID_FIELD, (String) null))) + ); + assertThat( + e.getMessage(), + containsString("[inference_id] on mapper [field] of type [semantic_text] must not have a [null] value") + ); + } + { + Exception e = expectThrows( + MapperParsingException.class, + () -> createMapperService(fieldMapping(b -> b.field("type", "semantic_text").field(INFERENCE_ID_FIELD, ""))) + ); + assertThat(e.getMessage(), containsString("[inference_id] on mapper [field] of type [semantic_text] must not be empty")); + } + { + Exception e = expectThrows( + MapperParsingException.class, + () -> createMapperService(fieldMapping(b -> b.field("type", "semantic_text").field(SEARCH_INFERENCE_ID_FIELD, ""))) + ); + assertThat(e.getMessage(), containsString("[search_inference_id] on mapper [field] of type [semantic_text] must not be empty")); + } + } + + public void testCannotBeUsedInMultiFields() { + Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "text"); + b.startObject("fields"); + b.startObject("semantic"); + b.field("type", "semantic_text"); + b.field("inference_id", "my_inference_id"); + b.endObject(); + b.endObject(); + }))); + assertThat(e.getMessage(), containsString("Field [semantic] of type [semantic_text] can't be used in multifields")); + } + + public void testUpdatesToInferenceIdNotSupported() throws IOException { + String fieldName = randomAlphaOfLengthBetween(5, 15); + MapperService mapperService = createMapperService( + mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()) + ); + assertSemanticTextField(mapperService, fieldName, false); + Exception e = expectThrows( + IllegalArgumentException.class, + () -> merge( + mapperService, + mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "another_model").endObject()) + ) + ); + assertThat(e.getMessage(), containsString("Cannot update parameter [inference_id] from [test_model] to [another_model]")); + } + + public void testDynamicUpdate() throws IOException { + final String fieldName = "semantic"; + final String inferenceId = "test_service"; + final String searchInferenceId = "search_test_service"; + + { + MapperService mapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + new LegacySemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null) + ); + assertSemanticTextField(mapperService, fieldName, true); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); + } + + { + MapperService mapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + searchInferenceId, + new LegacySemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null) + ); + assertSemanticTextField(mapperService, fieldName, true); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId); + } + } + + public void testUpdateModelSettings() throws IOException { + for (int depth = 1; depth < 5; depth++) { + String fieldName = randomFieldName(depth); + MapperService mapperService = createMapperService( + mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()) + ); + assertSemanticTextField(mapperService, fieldName, false); + { + Exception exc = expectThrows( + MapperParsingException.class, + () -> merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", "test_model") + .startObject("model_settings") + .field("inference_id", "test_model") + .endObject() + .endObject() + ) + ) + ); + assertThat(exc.getMessage(), containsString("Required [task_type]")); + } + { + merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", "test_model") + .startObject("model_settings") + .field("task_type", "sparse_embedding") + .endObject() + .endObject() + ) + ); + assertSemanticTextField(mapperService, fieldName, true); + } + { + merge( + mapperService, + mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()) + ); + assertSemanticTextField(mapperService, fieldName, true); + } + { + Exception exc = expectThrows( + IllegalArgumentException.class, + () -> merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", "test_model") + .startObject("model_settings") + .field("task_type", "text_embedding") + .field("dimensions", 10) + .field("similarity", "cosine") + .field("element_type", "float") + .endObject() + .endObject() + ) + ) + ); + assertThat( + exc.getMessage(), + containsString( + "Cannot update parameter [model_settings] " + + "from [task_type=sparse_embedding] " + + "to [task_type=text_embedding, dimensions=10, similarity=cosine, element_type=float]" + ) + ); + } + } + } + + public void testUpdateSearchInferenceId() throws IOException { + final String inferenceId = "test_inference_id"; + final String searchInferenceId1 = "test_search_inference_id_1"; + final String searchInferenceId2 = "test_search_inference_id_2"; + + CheckedBiFunction buildMapping = (f, sid) -> mapping(b -> { + b.startObject(f).field("type", "semantic_text").field("inference_id", inferenceId); + if (sid != null) { + b.field("search_inference_id", sid); + } + b.endObject(); + }); + + for (int depth = 1; depth < 5; depth++) { + String fieldName = randomFieldName(depth); + MapperService mapperService = createMapperService(buildMapping.apply(fieldName, null)); + assertSemanticTextField(mapperService, fieldName, false); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); + + merge(mapperService, buildMapping.apply(fieldName, searchInferenceId1)); + assertSemanticTextField(mapperService, fieldName, false); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId1); + + merge(mapperService, buildMapping.apply(fieldName, searchInferenceId2)); + assertSemanticTextField(mapperService, fieldName, false); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId2); + + merge(mapperService, buildMapping.apply(fieldName, null)); + assertSemanticTextField(mapperService, fieldName, false); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); + + mapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + new LegacySemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null) + ); + assertSemanticTextField(mapperService, fieldName, true); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); + + merge(mapperService, buildMapping.apply(fieldName, searchInferenceId1)); + assertSemanticTextField(mapperService, fieldName, true); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId1); + + merge(mapperService, buildMapping.apply(fieldName, searchInferenceId2)); + assertSemanticTextField(mapperService, fieldName, true); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId2); + + merge(mapperService, buildMapping.apply(fieldName, null)); + assertSemanticTextField(mapperService, fieldName, true); + assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); + } + } + + private static void assertSemanticTextField(MapperService mapperService, String fieldName, boolean expectedModelSettings) { + Mapper mapper = mapperService.mappingLookup().getMapper(fieldName); + assertNotNull(mapper); + assertThat(mapper, instanceOf(LegacySemanticTextFieldMapper.class)); + LegacySemanticTextFieldMapper semanticFieldMapper = (LegacySemanticTextFieldMapper) mapper; + + var fieldType = mapperService.fieldType(fieldName); + assertNotNull(fieldType); + assertThat(fieldType, instanceOf(LegacySemanticTextFieldMapper.LegacySemanticTextFieldType.class)); + LegacySemanticTextFieldMapper.LegacySemanticTextFieldType semanticTextFieldType = + (LegacySemanticTextFieldMapper.LegacySemanticTextFieldType) fieldType; + assertTrue(semanticFieldMapper.fieldType() == semanticTextFieldType); + + NestedObjectMapper chunksMapper = mapperService.mappingLookup() + .nestedLookup() + .getNestedMappers() + .get(getChunksFieldName(fieldName)); + assertThat(chunksMapper, equalTo(semanticFieldMapper.fieldType().getChunksField())); + assertThat(chunksMapper.fullPath(), equalTo(getChunksFieldName(fieldName))); + Mapper textMapper = chunksMapper.getMapper(CHUNKED_TEXT_FIELD); + assertNotNull(textMapper); + assertThat(textMapper, instanceOf(KeywordFieldMapper.class)); + KeywordFieldMapper textFieldMapper = (KeywordFieldMapper) textMapper; + assertFalse(textFieldMapper.fieldType().isIndexed()); + assertFalse(textFieldMapper.fieldType().hasDocValues()); + if (expectedModelSettings) { + assertNotNull(semanticFieldMapper.fieldType().getModelSettings()); + Mapper embeddingsMapper = chunksMapper.getMapper(CHUNKED_EMBEDDINGS_FIELD); + assertNotNull(embeddingsMapper); + assertThat(embeddingsMapper, instanceOf(FieldMapper.class)); + FieldMapper embeddingsFieldMapper = (FieldMapper) embeddingsMapper; + assertTrue(embeddingsFieldMapper.fieldType() == mapperService.mappingLookup().getFieldType(getEmbeddingsFieldName(fieldName))); + assertThat(embeddingsMapper.fullPath(), equalTo(getEmbeddingsFieldName(fieldName))); + switch (semanticFieldMapper.fieldType().getModelSettings().taskType()) { + case SPARSE_EMBEDDING -> assertThat(embeddingsMapper, instanceOf(SparseVectorFieldMapper.class)); + case TEXT_EMBEDDING -> assertThat(embeddingsMapper, instanceOf(DenseVectorFieldMapper.class)); + default -> throw new AssertionError("Invalid task type"); + } + } else { + assertNull(semanticFieldMapper.fieldType().getModelSettings()); + } + } + + private static void assertInferenceEndpoints( + MapperService mapperService, + String fieldName, + String expectedInferenceId, + String expectedSearchInferenceId + ) { + var fieldType = mapperService.fieldType(fieldName); + assertNotNull(fieldType); + assertThat(fieldType, instanceOf(LegacySemanticTextFieldMapper.LegacySemanticTextFieldType.class)); + LegacySemanticTextFieldMapper.LegacySemanticTextFieldType semanticTextFieldType = + (LegacySemanticTextFieldMapper.LegacySemanticTextFieldType) fieldType; + assertEquals(expectedInferenceId, semanticTextFieldType.getInferenceId()); + assertEquals(expectedSearchInferenceId, semanticTextFieldType.getSearchInferenceId()); + } + + public void testSuccessfulParse() throws IOException { + for (int depth = 1; depth < 4; depth++) { + final String fieldName1 = randomFieldName(depth); + final String fieldName2 = randomFieldName(depth + 1); + final String searchInferenceId = randomAlphaOfLength(8); + final boolean setSearchInferenceId = randomBoolean(); + + Model model1 = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING); + Model model2 = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING); + XContentBuilder mapping = mapping(b -> { + addSemanticTextMapping(b, fieldName1, model1.getInferenceEntityId(), setSearchInferenceId ? searchInferenceId : null); + addSemanticTextMapping(b, fieldName2, model2.getInferenceEntityId(), setSearchInferenceId ? searchInferenceId : null); + }); + + MapperService mapperService = createMapperService(mapping); + assertSemanticTextField(mapperService, fieldName1, false); + assertInferenceEndpoints( + mapperService, + fieldName1, + model1.getInferenceEntityId(), + setSearchInferenceId ? searchInferenceId : model1.getInferenceEntityId() + ); + assertSemanticTextField(mapperService, fieldName2, false); + assertInferenceEndpoints( + mapperService, + fieldName2, + model2.getInferenceEntityId(), + setSearchInferenceId ? searchInferenceId : model2.getInferenceEntityId() + ); + + DocumentMapper documentMapper = mapperService.documentMapper(); + ParsedDocument doc = documentMapper.parse( + source( + b -> addSemanticTextInferenceResults( + b, + List.of( + randomSemanticText(fieldName1, model1, List.of("a b", "c"), XContentType.JSON), + randomSemanticText(fieldName2, model2, List.of("d e f"), XContentType.JSON) + ) + ) + ) + ); + + List luceneDocs = doc.docs(); + assertEquals(4, luceneDocs.size()); + for (int i = 0; i < 3; i++) { + assertEquals(doc.rootDoc(), luceneDocs.get(i).getParent()); + } + // nested docs are in reversed order + assertSparseFeatures(luceneDocs.get(0), getEmbeddingsFieldName(fieldName1), 2); + assertSparseFeatures(luceneDocs.get(1), getEmbeddingsFieldName(fieldName1), 1); + assertSparseFeatures(luceneDocs.get(2), getEmbeddingsFieldName(fieldName2), 3); + assertEquals(doc.rootDoc(), luceneDocs.get(3)); + assertNull(luceneDocs.get(3).getParent()); + + withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), reader -> { + NestedDocuments nested = new NestedDocuments( + mapperService.mappingLookup(), + QueryBitSetProducer::new, + IndexVersion.current() + ); + LeafNestedDocuments leaf = nested.getLeafNestedDocuments(reader.leaves().get(0)); + + Set visitedNestedIdentities = new HashSet<>(); + Set expectedVisitedNestedIdentities = Set.of( + new SearchHit.NestedIdentity(getChunksFieldName(fieldName1), 0, null), + new SearchHit.NestedIdentity(getChunksFieldName(fieldName1), 1, null), + new SearchHit.NestedIdentity(getChunksFieldName(fieldName2), 0, null) + ); + + assertChildLeafNestedDocument(leaf, 0, 3, visitedNestedIdentities); + assertChildLeafNestedDocument(leaf, 1, 3, visitedNestedIdentities); + assertChildLeafNestedDocument(leaf, 2, 3, visitedNestedIdentities); + assertEquals(expectedVisitedNestedIdentities, visitedNestedIdentities); + + assertNull(leaf.advance(3)); + assertEquals(3, leaf.doc()); + assertEquals(3, leaf.rootDoc()); + assertNull(leaf.nestedIdentity()); + + IndexSearcher searcher = newSearcher(reader); + { + TopDocs topDocs = searcher.search( + generateNestedTermSparseVectorQuery(mapperService.mappingLookup().nestedLookup(), fieldName1, List.of("a")), + 10 + ); + assertEquals(1, topDocs.totalHits.value()); + assertEquals(3, topDocs.scoreDocs[0].doc); + } + { + TopDocs topDocs = searcher.search( + generateNestedTermSparseVectorQuery(mapperService.mappingLookup().nestedLookup(), fieldName1, List.of("a", "b")), + 10 + ); + assertEquals(1, topDocs.totalHits.value()); + assertEquals(3, topDocs.scoreDocs[0].doc); + } + { + TopDocs topDocs = searcher.search( + generateNestedTermSparseVectorQuery(mapperService.mappingLookup().nestedLookup(), fieldName2, List.of("d")), + 10 + ); + assertEquals(1, topDocs.totalHits.value()); + assertEquals(3, topDocs.scoreDocs[0].doc); + } + { + TopDocs topDocs = searcher.search( + generateNestedTermSparseVectorQuery(mapperService.mappingLookup().nestedLookup(), fieldName2, List.of("z")), + 10 + ); + assertEquals(0, topDocs.totalHits.value()); + } + }); + } + } + + public void testMissingInferenceId() throws IOException { + DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); + IllegalArgumentException ex = expectThrows( + DocumentParsingException.class, + IllegalArgumentException.class, + () -> documentMapper.parse( + source( + b -> b.startObject("field") + .startObject(INFERENCE_FIELD) + .field(MODEL_SETTINGS_FIELD, new LegacySemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null)) + .field(CHUNKS_FIELD, List.of()) + .endObject() + .endObject() + ) + ) + ); + assertThat(ex.getCause().getMessage(), containsString("Required [inference_id]")); + } + + public void testMissingModelSettings() throws IOException { + DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); + IllegalArgumentException ex = expectThrows( + DocumentParsingException.class, + IllegalArgumentException.class, + () -> documentMapper.parse( + source(b -> b.startObject("field").startObject(INFERENCE_FIELD).field(INFERENCE_ID_FIELD, "my_id").endObject().endObject()) + ) + ); + assertThat(ex.getCause().getMessage(), containsString("Required [model_settings, chunks]")); + } + + public void testMissingTaskType() throws IOException { + DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); + IllegalArgumentException ex = expectThrows( + DocumentParsingException.class, + IllegalArgumentException.class, + () -> documentMapper.parse( + source( + b -> b.startObject("field") + .startObject(INFERENCE_FIELD) + .field(INFERENCE_ID_FIELD, "my_id") + .startObject(MODEL_SETTINGS_FIELD) + .endObject() + .endObject() + .endObject() + ) + ) + ); + assertThat(ex.getCause().getMessage(), containsString("failed to parse field [model_settings]")); + } + + public void testDenseVectorElementType() throws IOException { + final String fieldName = "field"; + final String inferenceId = "test_service"; + + BiConsumer assertMapperService = (m, e) -> { + Mapper mapper = m.mappingLookup().getMapper(fieldName); + assertThat(mapper, instanceOf(LegacySemanticTextFieldMapper.class)); + LegacySemanticTextFieldMapper semanticTextFieldMapper = (LegacySemanticTextFieldMapper) mapper; + assertThat(semanticTextFieldMapper.fieldType().getModelSettings().elementType(), equalTo(e)); + }; + + MapperService floatMapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + new LegacySemanticTextField.ModelSettings( + TaskType.TEXT_EMBEDDING, + 1024, + SimilarityMeasure.COSINE, + DenseVectorFieldMapper.ElementType.FLOAT + ) + ); + assertMapperService.accept(floatMapperService, DenseVectorFieldMapper.ElementType.FLOAT); + + MapperService byteMapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + new LegacySemanticTextField.ModelSettings( + TaskType.TEXT_EMBEDDING, + 1024, + SimilarityMeasure.COSINE, + DenseVectorFieldMapper.ElementType.BYTE + ) + ); + assertMapperService.accept(byteMapperService, DenseVectorFieldMapper.ElementType.BYTE); + } + + private MapperService mapperServiceForFieldWithModelSettings( + String fieldName, + String inferenceId, + LegacySemanticTextField.ModelSettings modelSettings + ) throws IOException { + return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings); + } + + private MapperService mapperServiceForFieldWithModelSettings( + String fieldName, + String inferenceId, + String searchInferenceId, + LegacySemanticTextField.ModelSettings modelSettings + ) throws IOException { + String mappingParams = "type=semantic_text,inference_id=" + inferenceId; + if (searchInferenceId != null) { + mappingParams += ",search_inference_id=" + searchInferenceId; + } + + MapperService mapperService = createMapperService(mapping(b -> {})); + mapperService.merge( + "_doc", + new CompressedXContent(Strings.toString(PutMappingRequest.simpleMapping(fieldName, mappingParams))), + MapperService.MergeReason.MAPPING_UPDATE + ); + + LegacySemanticTextField semanticTextField = new LegacySemanticTextField( + fieldName, + List.of(), + new LegacySemanticTextField.InferenceResult(inferenceId, modelSettings, List.of()), + XContentType.JSON + ); + XContentBuilder builder = JsonXContent.contentBuilder().startObject(); + builder.field(semanticTextField.fieldName()); + builder.value(semanticTextField); + builder.endObject(); + + SourceToParse sourceToParse = new SourceToParse("test", BytesReference.bytes(builder), XContentType.JSON); + ParsedDocument parsedDocument = mapperService.documentMapper().parse(sourceToParse); + mapperService.merge( + "_doc", + parsedDocument.dynamicMappingsUpdate().toCompressedXContent(), + MapperService.MergeReason.MAPPING_UPDATE + ); + return mapperService; + } + + public void testExistsQuerySparseVector() throws IOException { + final String fieldName = "semantic"; + final String inferenceId = "test_service"; + + MapperService mapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + new LegacySemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null) + ); + + Mapper mapper = mapperService.mappingLookup().getMapper(fieldName); + assertNotNull(mapper); + SearchExecutionContext searchExecutionContext = createSearchExecutionContext(mapperService); + Query existsQuery = ((LegacySemanticTextFieldMapper) mapper).fieldType().existsQuery(searchExecutionContext); + assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class)); + } + + public void testExistsQueryDenseVector() throws IOException { + final String fieldName = "semantic"; + final String inferenceId = "test_service"; + + MapperService mapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + new LegacySemanticTextField.ModelSettings( + TaskType.TEXT_EMBEDDING, + 1024, + SimilarityMeasure.COSINE, + DenseVectorFieldMapper.ElementType.FLOAT + ) + ); + + Mapper mapper = mapperService.mappingLookup().getMapper(fieldName); + assertNotNull(mapper); + SearchExecutionContext searchExecutionContext = createSearchExecutionContext(mapperService); + Query existsQuery = ((LegacySemanticTextFieldMapper) mapper).fieldType().existsQuery(searchExecutionContext); + assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class)); + } + + public void testInsertValueMapTraversal() throws IOException { + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("test", "value").endObject(); + + Map map = toSourceMap(Strings.toString(builder)); + LegacySemanticTextFieldMapper.insertValue("test", map, "value2"); + assertThat(getMapValue(map, "test"), equalTo("value2")); + LegacySemanticTextFieldMapper.insertValue("something.else", map, "something_else_value"); + assertThat(getMapValue(map, "something\\.else"), equalTo("something_else_value")); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + builder.startObject("path1").startObject("path2").field("test", "value").endObject().endObject(); + builder.endObject(); + + Map map = toSourceMap(Strings.toString(builder)); + LegacySemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); + assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); + LegacySemanticTextFieldMapper.insertValue("path1.path2.test_me", map, "test_me_value"); + assertThat(getMapValue(map, "path1.path2.test_me"), equalTo("test_me_value")); + LegacySemanticTextFieldMapper.insertValue("path1.non_path2.test", map, "test_value"); + assertThat(getMapValue(map, "path1.non_path2\\.test"), equalTo("test_value")); + + LegacySemanticTextFieldMapper.insertValue("path1.path2", map, Map.of("path3", "bar")); + assertThat(getMapValue(map, "path1.path2"), equalTo(Map.of("path3", "bar"))); + + LegacySemanticTextFieldMapper.insertValue("path1", map, "baz"); + assertThat(getMapValue(map, "path1"), equalTo("baz")); + + LegacySemanticTextFieldMapper.insertValue("path3.path4", map, Map.of("test", "foo")); + assertThat(getMapValue(map, "path3\\.path4"), equalTo(Map.of("test", "foo"))); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + builder.startObject("path1").array("test", "value1", "value2").endObject(); + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + LegacySemanticTextFieldMapper.insertValue("path1.test", map, List.of("value3", "value4", "value5")); + assertThat(getMapValue(map, "path1.test"), equalTo(List.of("value3", "value4", "value5"))); + + LegacySemanticTextFieldMapper.insertValue("path2.test", map, List.of("value6", "value7", "value8")); + assertThat(getMapValue(map, "path2\\.test"), equalTo(List.of("value6", "value7", "value8"))); + } + } + + public void testInsertValueListTraversal() throws IOException { + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1"); + { + builder.startArray("path2"); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + } + builder.endObject(); + } + { + builder.startObject("path3"); + { + builder.startArray("path4"); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + LegacySemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); + assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); + LegacySemanticTextFieldMapper.insertValue("path1.path2.test2", map, "value3"); + assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); + assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(Map.of("test", "value2", "test2", "value3")))); + + LegacySemanticTextFieldMapper.insertValue("path3.path4.test", map, "value4"); + assertThat(getMapValue(map, "path3.path4.test"), equalTo("value4")); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1"); + { + builder.startArray("path2"); + builder.startArray(); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + LegacySemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); + assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); + LegacySemanticTextFieldMapper.insertValue("path1.path2.test2", map, "value3"); + assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); + assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(List.of(Map.of("test", "value2", "test2", "value3"))))); + } + } + + public void testInsertValueFieldsWithDots() throws IOException { + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("xxx.yyy", "value1").endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + LegacySemanticTextFieldMapper.insertValue("xxx.yyy", map, "value2"); + assertThat(getMapValue(map, "xxx\\.yyy"), equalTo("value2")); + + LegacySemanticTextFieldMapper.insertValue("xxx", map, "value3"); + assertThat(getMapValue(map, "xxx"), equalTo("value3")); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1.path2"); + { + builder.startObject("path3.path4"); + builder.field("test", "value1"); + builder.endObject(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + LegacySemanticTextFieldMapper.insertValue("path1.path2.path3.path4.test", map, "value2"); + assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test"), equalTo("value2")); + + LegacySemanticTextFieldMapper.insertValue("path1.path2.path3.path4.test2", map, "value3"); + assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test2"), equalTo("value3")); + assertThat(getMapValue(map, "path1\\.path2.path3\\.path4"), equalTo(Map.of("test", "value2", "test2", "value3"))); + } + } + + public void testInsertValueAmbiguousPath() throws IOException { + // Mixed dotted object notation + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1.path2"); + { + builder.startObject("path3"); + builder.field("test1", "value1"); + builder.endObject(); + } + builder.endObject(); + } + { + builder.startObject("path1"); + { + builder.startObject("path2.path3"); + builder.field("test2", "value2"); + builder.endObject(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); + + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> LegacySemanticTextFieldMapper.insertValue("path1.path2.path3.test1", map, "value3") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + ex = assertThrows( + IllegalArgumentException.class, + () -> LegacySemanticTextFieldMapper.insertValue("path1.path2.path3.test3", map, "value4") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + assertThat(map, equalTo(originalMap)); + } + + // traversal through lists + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1.path2"); + { + builder.startArray("path3"); + builder.startObject().field("test1", "value1").endObject(); + builder.endArray(); + } + builder.endObject(); + } + { + builder.startObject("path1"); + { + builder.startArray("path2.path3"); + builder.startObject().field("test2", "value2").endObject(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); + + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> LegacySemanticTextFieldMapper.insertValue("path1.path2.path3.test1", map, "value3") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + ex = assertThrows( + IllegalArgumentException.class, + () -> LegacySemanticTextFieldMapper.insertValue("path1.path2.path3.test3", map, "value4") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + assertThat(map, equalTo(originalMap)); + } + } + + public void testInsertValueCannotTraversePath() throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1"); + { + builder.startArray("path2"); + builder.startArray(); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); + + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> LegacySemanticTextFieldMapper.insertValue("path1.path2.test.test2", map, "value2") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.test] has value [value1] of type [String], which cannot be traversed into further") + ); + + assertThat(map, equalTo(originalMap)); + } + + @Override + protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { + // Until a doc is indexed, the query is rewritten as match no docs + assertThat(query, instanceOf(MatchNoDocsQuery.class)); + } + + private static void addSemanticTextMapping( + XContentBuilder mappingBuilder, + String fieldName, + String inferenceId, + String searchInferenceId + ) throws IOException { + mappingBuilder.startObject(fieldName); + mappingBuilder.field("type", LegacySemanticTextFieldMapper.CONTENT_TYPE); + mappingBuilder.field("inference_id", inferenceId); + if (searchInferenceId != null) { + mappingBuilder.field("search_inference_id", searchInferenceId); + } + mappingBuilder.endObject(); + } + + private static void addSemanticTextInferenceResults( + XContentBuilder sourceBuilder, + List semanticTextInferenceResults + ) throws IOException { + for (var field : semanticTextInferenceResults) { + sourceBuilder.field(field.fieldName()); + sourceBuilder.value(field); + } + } + + static String randomFieldName(int numLevel) { + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < numLevel; i++) { + if (i > 0) { + builder.append('.'); + } + builder.append(randomAlphaOfLengthBetween(5, 15)); + } + return builder.toString(); + } + + private static Query generateNestedTermSparseVectorQuery(NestedLookup nestedLookup, String fieldName, List tokens) { + NestedObjectMapper mapper = nestedLookup.getNestedMappers().get(getChunksFieldName(fieldName)); + assertNotNull(mapper); + + BitSetProducer parentFilter = new QueryBitSetProducer(Queries.newNonNestedFilter(IndexVersion.current())); + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + for (String token : tokens) { + queryBuilder.add( + new BooleanClause(new TermQuery(new Term(getEmbeddingsFieldName(fieldName), token)), BooleanClause.Occur.MUST) + ); + } + queryBuilder.add(new BooleanClause(mapper.nestedTypeFilter(), BooleanClause.Occur.FILTER)); + + return new ESToParentBlockJoinQuery(queryBuilder.build(), parentFilter, ScoreMode.Total, null); + } + + private static void assertChildLeafNestedDocument( + LeafNestedDocuments leaf, + int advanceToDoc, + int expectedRootDoc, + Set visitedNestedIdentities + ) throws IOException { + + assertNotNull(leaf.advance(advanceToDoc)); + assertEquals(advanceToDoc, leaf.doc()); + assertEquals(expectedRootDoc, leaf.rootDoc()); + assertNotNull(leaf.nestedIdentity()); + visitedNestedIdentities.add(leaf.nestedIdentity()); + } + + private static void assertSparseFeatures(LuceneDocument doc, String fieldName, int expectedCount) { + int count = 0; + for (IndexableField field : doc.getFields()) { + if (field instanceof FeatureField featureField) { + assertThat(featureField.name(), equalTo(fieldName)); + ++count; + } + } + assertThat(count, equalTo(expectedCount)); + } + + private Map toSourceMap(String source) throws IOException { + try (XContentParser parser = createParser(JsonXContent.jsonXContent, source)) { + return parser.map(); + } + } + + private static Object getMapValue(Map map, String key) { + // Split the path on unescaped "." chars and then unescape the escaped "." chars + final String[] pathElements = Arrays.stream(key.split("(? k.replace("\\.", ".")).toArray(String[]::new); + + Object value = null; + Object nextLayer = map; + for (int i = 0; i < pathElements.length; i++) { + if (nextLayer instanceof Map nextMap) { + value = nextMap.get(pathElements[i]); + } else if (nextLayer instanceof List nextList) { + final String pathElement = pathElements[i]; + List values = nextList.stream().flatMap(v -> { + Stream.Builder streamBuilder = Stream.builder(); + if (v instanceof List innerList) { + traverseList(innerList, streamBuilder); + } else { + streamBuilder.add(v); + } + return streamBuilder.build(); + }).filter(v -> v instanceof Map).map(v -> ((Map) v).get(pathElement)).filter(Objects::nonNull).toList(); + + if (values.isEmpty()) { + return null; + } else if (values.size() > 1) { + throw new AssertionError("List " + nextList + " contains multiple values for [" + pathElement + "]"); + } else { + value = values.getFirst(); + } + } else if (nextLayer == null) { + break; + } else { + throw new AssertionError( + "Path [" + + String.join(".", Arrays.copyOfRange(pathElements, 0, i)) + + "] has value [" + + value + + "] of type [" + + value.getClass().getSimpleName() + + "], which cannot be traversed into further" + ); + } + + nextLayer = value; + } + + return value; + } + + private static void traverseList(List list, Stream.Builder streamBuilder) { + for (Object value : list) { + if (value instanceof List innerList) { + traverseList(innerList, streamBuilder); + } else { + streamBuilder.add(value); + } + } + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldTests.java new file mode 100644 index 0000000000000..f7a88865c58cd --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/LegacySemanticTextFieldTests.java @@ -0,0 +1,292 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.mapper; + +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.inference.ChunkedInferenceServiceResults; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.test.AbstractXContentTestCase; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; +import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; +import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; +import org.elasticsearch.xpack.core.ml.search.WeightedToken; +import org.elasticsearch.xpack.core.utils.FloatConversionUtils; +import org.elasticsearch.xpack.inference.model.TestModel; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.Predicate; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +public class LegacySemanticTextFieldTests extends AbstractXContentTestCase { + private static final String NAME = "field"; + + @Override + protected Predicate getRandomFieldsExcludeFilter() { + return n -> n.endsWith(LegacySemanticTextField.CHUNKED_EMBEDDINGS_FIELD); + } + + @Override + protected void assertEqualInstances(LegacySemanticTextField expectedInstance, LegacySemanticTextField newInstance) { + assertThat(newInstance.fieldName(), equalTo(expectedInstance.fieldName())); + assertThat(newInstance.inference().modelSettings(), equalTo(expectedInstance.inference().modelSettings())); + assertThat(newInstance.inference().chunks().size(), equalTo(expectedInstance.inference().chunks().size())); + LegacySemanticTextField.ModelSettings modelSettings = newInstance.inference().modelSettings(); + for (int i = 0; i < newInstance.inference().chunks().size(); i++) { + assertThat(newInstance.inference().chunks().get(i).text(), equalTo(expectedInstance.inference().chunks().get(i).text())); + switch (modelSettings.taskType()) { + case TEXT_EMBEDDING -> { + double[] expectedVector = parseDenseVector( + expectedInstance.inference().chunks().get(i).rawEmbeddings(), + modelSettings.dimensions(), + expectedInstance.contentType() + ); + double[] newVector = parseDenseVector( + newInstance.inference().chunks().get(i).rawEmbeddings(), + modelSettings.dimensions(), + newInstance.contentType() + ); + assertArrayEquals(expectedVector, newVector, 0.0000001f); + } + case SPARSE_EMBEDDING -> { + List expectedTokens = parseWeightedTokens( + expectedInstance.inference().chunks().get(i).rawEmbeddings(), + expectedInstance.contentType() + ); + List newTokens = parseWeightedTokens( + newInstance.inference().chunks().get(i).rawEmbeddings(), + newInstance.contentType() + ); + assertThat(newTokens, equalTo(expectedTokens)); + } + default -> throw new AssertionError("Invalid task type " + modelSettings.taskType()); + } + } + } + + @Override + protected LegacySemanticTextField createTestInstance() { + List rawValues = randomList(1, 5, () -> randomSemanticTextInput().toString()); + try { // try catch required for override + return randomSemanticText(NAME, TestModel.createRandomInstance(), rawValues, randomFrom(XContentType.values())); + } catch (IOException e) { + fail("Failed to create random LegacySemanticTextField instance"); + } + return null; + } + + @Override + protected LegacySemanticTextField doParseInstance(XContentParser parser) throws IOException { + return LegacySemanticTextField.parse(parser, new Tuple<>(NAME, parser.contentType())); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } + + public void testModelSettingsValidation() { + NullPointerException npe = expectThrows(NullPointerException.class, () -> { + new LegacySemanticTextField.ModelSettings(null, 10, SimilarityMeasure.COSINE, DenseVectorFieldMapper.ElementType.FLOAT); + }); + assertThat(npe.getMessage(), equalTo("task type must not be null")); + + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> { + new LegacySemanticTextField.ModelSettings( + TaskType.COMPLETION, + 10, + SimilarityMeasure.COSINE, + DenseVectorFieldMapper.ElementType.FLOAT + ); + }); + assertThat(ex.getMessage(), containsString("Wrong [task_type]")); + + ex = expectThrows(IllegalArgumentException.class, () -> { + new LegacySemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, 10, null, null); + }); + assertThat(ex.getMessage(), containsString("[dimensions] is not allowed")); + + ex = expectThrows(IllegalArgumentException.class, () -> { + new LegacySemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, SimilarityMeasure.COSINE, null); + }); + assertThat(ex.getMessage(), containsString("[similarity] is not allowed")); + + ex = expectThrows(IllegalArgumentException.class, () -> { + new LegacySemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, DenseVectorFieldMapper.ElementType.FLOAT); + }); + assertThat(ex.getMessage(), containsString("[element_type] is not allowed")); + + ex = expectThrows(IllegalArgumentException.class, () -> { + new LegacySemanticTextField.ModelSettings( + TaskType.TEXT_EMBEDDING, + null, + SimilarityMeasure.COSINE, + DenseVectorFieldMapper.ElementType.FLOAT + ); + }); + assertThat(ex.getMessage(), containsString("required [dimensions] field is missing")); + + ex = expectThrows(IllegalArgumentException.class, () -> { + new LegacySemanticTextField.ModelSettings(TaskType.TEXT_EMBEDDING, 10, null, DenseVectorFieldMapper.ElementType.FLOAT); + }); + assertThat(ex.getMessage(), containsString("required [similarity] field is missing")); + + ex = expectThrows(IllegalArgumentException.class, () -> { + new LegacySemanticTextField.ModelSettings(TaskType.TEXT_EMBEDDING, 10, SimilarityMeasure.COSINE, null); + }); + assertThat(ex.getMessage(), containsString("required [element_type] field is missing")); + } + + public static InferenceChunkedTextEmbeddingFloatResults randomInferenceChunkedTextEmbeddingFloatResults( + Model model, + List inputs + ) throws IOException { + List chunks = new ArrayList<>(); + for (String input : inputs) { + float[] values = new float[model.getServiceSettings().dimensions()]; + for (int j = 0; j < values.length; j++) { + values[j] = (float) randomDouble(); + } + chunks.add(new InferenceChunkedTextEmbeddingFloatResults.InferenceFloatEmbeddingChunk(input, values)); + } + return new InferenceChunkedTextEmbeddingFloatResults(chunks); + } + + public static InferenceChunkedSparseEmbeddingResults randomSparseEmbeddings(List inputs) { + List chunks = new ArrayList<>(); + for (String input : inputs) { + var tokens = new ArrayList(); + for (var token : input.split("\\s+")) { + tokens.add(new WeightedToken(token, randomFloat())); + } + chunks.add(new MlChunkedTextExpansionResults.ChunkedResult(input, tokens)); + } + return new InferenceChunkedSparseEmbeddingResults(chunks); + } + + public static LegacySemanticTextField randomSemanticText(String fieldName, Model model, List inputs, XContentType contentType) + throws IOException { + ChunkedInferenceServiceResults results = switch (model.getTaskType()) { + case TEXT_EMBEDDING -> randomInferenceChunkedTextEmbeddingFloatResults(model, inputs); + case SPARSE_EMBEDDING -> randomSparseEmbeddings(inputs); + default -> throw new AssertionError("invalid task type: " + model.getTaskType().name()); + }; + return semanticTextFieldFromChunkedInferenceResults(fieldName, model, inputs, results, contentType); + } + + public static LegacySemanticTextField semanticTextFieldFromChunkedInferenceResults( + String fieldName, + Model model, + List inputs, + ChunkedInferenceServiceResults results, + XContentType contentType + ) { + return new LegacySemanticTextField( + fieldName, + inputs, + new LegacySemanticTextField.InferenceResult( + model.getInferenceEntityId(), + new LegacySemanticTextField.ModelSettings(model), + LegacySemanticTextField.toSemanticTextFieldChunks(List.of(results), contentType) + ), + contentType + ); + } + + /** + * Returns a randomly generated object for Semantic Text tests purpose. + */ + public static Object randomSemanticTextInput() { + if (rarely()) { + return switch (randomIntBetween(0, 4)) { + case 0 -> randomInt(); + case 1 -> randomLong(); + case 2 -> randomFloat(); + case 3 -> randomBoolean(); + case 4 -> randomDouble(); + default -> throw new IllegalStateException("Illegal state while generating random semantic text input"); + }; + } else { + return randomAlphaOfLengthBetween(10, 20); + } + } + + public static ChunkedInferenceServiceResults toChunkedResult(LegacySemanticTextField field) throws IOException { + switch (field.inference().modelSettings().taskType()) { + case SPARSE_EMBEDDING -> { + List chunks = new ArrayList<>(); + for (var chunk : field.inference().chunks()) { + var tokens = parseWeightedTokens(chunk.rawEmbeddings(), field.contentType()); + // TODO + chunks.add(new MlChunkedTextExpansionResults.ChunkedResult(null, tokens)); + } + return new InferenceChunkedSparseEmbeddingResults(chunks); + } + case TEXT_EMBEDDING -> { + List chunks = new ArrayList<>(); + for (var chunk : field.inference().chunks()) { + double[] values = parseDenseVector( + chunk.rawEmbeddings(), + field.inference().modelSettings().dimensions(), + field.contentType() + ); + // TODO + chunks.add( + new InferenceChunkedTextEmbeddingFloatResults.InferenceFloatEmbeddingChunk( + null, + FloatConversionUtils.floatArrayOf(values) + ) + ); + } + return new InferenceChunkedTextEmbeddingFloatResults(chunks); + } + default -> throw new AssertionError("Invalid task_type: " + field.inference().modelSettings().taskType().name()); + } + } + + private static double[] parseDenseVector(BytesReference value, int numDims, XContentType contentType) { + try (XContentParser parser = XContentHelper.createParserNotCompressed(XContentParserConfiguration.EMPTY, value, contentType)) { + parser.nextToken(); + assertThat(parser.currentToken(), equalTo(XContentParser.Token.START_ARRAY)); + double[] values = new double[numDims]; + for (int i = 0; i < numDims; i++) { + assertThat(parser.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); + values[i] = parser.doubleValue(); + } + assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_ARRAY)); + return values; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static List parseWeightedTokens(BytesReference value, XContentType contentType) { + try (XContentParser parser = XContentHelper.createParserNotCompressed(XContentParserConfiguration.EMPTY, value, contentType)) { + Map map = parser.map(); + List weightedTokens = new ArrayList<>(); + for (var entry : map.entrySet()) { + weightedTokens.add(new WeightedToken(entry.getKey(), ((Number) entry.getValue()).floatValue())); + } + return weightedTokens; + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 71ff9fc7d84cf..ea948ec65ad82 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.join.QueryBitSetProducer; import org.apache.lucene.search.join.ScoreMode; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; +import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.common.CheckedBiFunction; import org.elasticsearch.common.Strings; @@ -31,10 +32,10 @@ import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; @@ -56,8 +57,8 @@ import org.elasticsearch.search.LeafNestedDocuments; import org.elasticsearch.search.NestedDocuments; import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; @@ -68,7 +69,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -79,9 +79,7 @@ import static java.util.Collections.singletonList; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_TEXT_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKS_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.INFERENCE_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.INFERENCE_ID_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.MODEL_SETTINGS_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.SEARCH_INFERENCE_ID_FIELD; @@ -99,6 +97,21 @@ protected Collection getPlugins() { return singletonList(new InferencePlugin(Settings.EMPTY)); } + @Override + protected Settings getIndexSettings() { + return Settings.builder() + .put( + IndexMetadata.SETTING_VERSION_CREATED, + IndexVersionUtils.randomVersionBetween(random(), IndexVersions.INFERENCE_METADATA_FIELDS, IndexVersion.current()) + ) + .build(); + } + + @Override + protected IndexVersion getVersion() { + return IndexVersionUtils.randomVersionBetween(random(), IndexVersions.INFERENCE_METADATA_FIELDS, IndexVersion.current()); + } + @Override protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "semantic_text"); @@ -151,15 +164,7 @@ protected IngestScriptSupport ingestScriptSupport() { @Override public MappedFieldType getMappedFieldType() { - return new SemanticTextFieldMapper.SemanticTextFieldType( - "field", - "fake-inference-id", - null, - null, - null, - IndexVersion.current(), - Map.of() - ); + return new SemanticTextFieldMapper.SemanticTextFieldType("field", "fake-inference-id", null, null, null, Map.of()); } @Override @@ -470,12 +475,6 @@ private static void assertSemanticTextField(MapperService mapperService, String .get(getChunksFieldName(fieldName)); assertThat(chunksMapper, equalTo(semanticFieldMapper.fieldType().getChunksField())); assertThat(chunksMapper.fullPath(), equalTo(getChunksFieldName(fieldName))); - Mapper textMapper = chunksMapper.getMapper(CHUNKED_TEXT_FIELD); - assertNotNull(textMapper); - assertThat(textMapper, instanceOf(KeywordFieldMapper.class)); - KeywordFieldMapper textFieldMapper = (KeywordFieldMapper) textMapper; - assertFalse(textFieldMapper.fieldType().isIndexed()); - assertFalse(textFieldMapper.fieldType().hasDocValues()); if (expectedModelSettings) { assertNotNull(semanticFieldMapper.fieldType().getModelSettings()); Mapper embeddingsMapper = chunksMapper.getMapper(CHUNKED_EMBEDDINGS_FIELD); @@ -632,11 +631,9 @@ public void testMissingInferenceId() throws IOException { () -> documentMapper.parse( source( b -> b.startObject("field") - .startObject(INFERENCE_FIELD) .field(MODEL_SETTINGS_FIELD, new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null)) .field(CHUNKS_FIELD, List.of()) .endObject() - .endObject() ) ) ); @@ -648,9 +645,7 @@ public void testMissingModelSettings() throws IOException { IllegalArgumentException ex = expectThrows( DocumentParsingException.class, IllegalArgumentException.class, - () -> documentMapper.parse( - source(b -> b.startObject("field").startObject(INFERENCE_FIELD).field(INFERENCE_ID_FIELD, "my_id").endObject().endObject()) - ) + () -> documentMapper.parse(source(b -> b.startObject("field").field(INFERENCE_ID_FIELD, "my_id").endObject())) ); assertThat(ex.getCause().getMessage(), containsString("Required [model_settings, chunks]")); } @@ -662,13 +657,7 @@ public void testMissingTaskType() throws IOException { IllegalArgumentException.class, () -> documentMapper.parse( source( - b -> b.startObject("field") - .startObject(INFERENCE_FIELD) - .field(INFERENCE_ID_FIELD, "my_id") - .startObject(MODEL_SETTINGS_FIELD) - .endObject() - .endObject() - .endObject() + b -> b.startObject("field").field(INFERENCE_ID_FIELD, "my_id").startObject(MODEL_SETTINGS_FIELD).endObject().endObject() ) ) ); @@ -737,12 +726,7 @@ private MapperService mapperServiceForFieldWithModelSettings( MapperService.MergeReason.MAPPING_UPDATE ); - SemanticTextField semanticTextField = new SemanticTextField( - fieldName, - List.of(), - new SemanticTextField.InferenceResult(inferenceId, modelSettings, List.of()), - XContentType.JSON - ); + SemanticTextField semanticTextField = new SemanticTextField(fieldName, inferenceId, modelSettings, List.of(), XContentType.JSON); XContentBuilder builder = JsonXContent.contentBuilder().startObject(); builder.field(semanticTextField.fieldName()); builder.value(semanticTextField); @@ -797,266 +781,6 @@ public void testExistsQueryDenseVector() throws IOException { assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class)); } - public void testInsertValueMapTraversal() throws IOException { - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("test", "value").endObject(); - - Map map = toSourceMap(Strings.toString(builder)); - SemanticTextFieldMapper.insertValue("test", map, "value2"); - assertThat(getMapValue(map, "test"), equalTo("value2")); - SemanticTextFieldMapper.insertValue("something.else", map, "something_else_value"); - assertThat(getMapValue(map, "something\\.else"), equalTo("something_else_value")); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - builder.startObject("path1").startObject("path2").field("test", "value").endObject().endObject(); - builder.endObject(); - - Map map = toSourceMap(Strings.toString(builder)); - SemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); - assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); - SemanticTextFieldMapper.insertValue("path1.path2.test_me", map, "test_me_value"); - assertThat(getMapValue(map, "path1.path2.test_me"), equalTo("test_me_value")); - SemanticTextFieldMapper.insertValue("path1.non_path2.test", map, "test_value"); - assertThat(getMapValue(map, "path1.non_path2\\.test"), equalTo("test_value")); - - SemanticTextFieldMapper.insertValue("path1.path2", map, Map.of("path3", "bar")); - assertThat(getMapValue(map, "path1.path2"), equalTo(Map.of("path3", "bar"))); - - SemanticTextFieldMapper.insertValue("path1", map, "baz"); - assertThat(getMapValue(map, "path1"), equalTo("baz")); - - SemanticTextFieldMapper.insertValue("path3.path4", map, Map.of("test", "foo")); - assertThat(getMapValue(map, "path3\\.path4"), equalTo(Map.of("test", "foo"))); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - builder.startObject("path1").array("test", "value1", "value2").endObject(); - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextFieldMapper.insertValue("path1.test", map, List.of("value3", "value4", "value5")); - assertThat(getMapValue(map, "path1.test"), equalTo(List.of("value3", "value4", "value5"))); - - SemanticTextFieldMapper.insertValue("path2.test", map, List.of("value6", "value7", "value8")); - assertThat(getMapValue(map, "path2\\.test"), equalTo(List.of("value6", "value7", "value8"))); - } - } - - public void testInsertValueListTraversal() throws IOException { - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1"); - { - builder.startArray("path2"); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - } - builder.endObject(); - } - { - builder.startObject("path3"); - { - builder.startArray("path4"); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); - assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); - SemanticTextFieldMapper.insertValue("path1.path2.test2", map, "value3"); - assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); - assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(Map.of("test", "value2", "test2", "value3")))); - - SemanticTextFieldMapper.insertValue("path3.path4.test", map, "value4"); - assertThat(getMapValue(map, "path3.path4.test"), equalTo("value4")); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1"); - { - builder.startArray("path2"); - builder.startArray(); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); - assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); - SemanticTextFieldMapper.insertValue("path1.path2.test2", map, "value3"); - assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); - assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(List.of(Map.of("test", "value2", "test2", "value3"))))); - } - } - - public void testInsertValueFieldsWithDots() throws IOException { - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("xxx.yyy", "value1").endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextFieldMapper.insertValue("xxx.yyy", map, "value2"); - assertThat(getMapValue(map, "xxx\\.yyy"), equalTo("value2")); - - SemanticTextFieldMapper.insertValue("xxx", map, "value3"); - assertThat(getMapValue(map, "xxx"), equalTo("value3")); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1.path2"); - { - builder.startObject("path3.path4"); - builder.field("test", "value1"); - builder.endObject(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextFieldMapper.insertValue("path1.path2.path3.path4.test", map, "value2"); - assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test"), equalTo("value2")); - - SemanticTextFieldMapper.insertValue("path1.path2.path3.path4.test2", map, "value3"); - assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test2"), equalTo("value3")); - assertThat(getMapValue(map, "path1\\.path2.path3\\.path4"), equalTo(Map.of("test", "value2", "test2", "value3"))); - } - } - - public void testInsertValueAmbiguousPath() throws IOException { - // Mixed dotted object notation - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1.path2"); - { - builder.startObject("path3"); - builder.field("test1", "value1"); - builder.endObject(); - } - builder.endObject(); - } - { - builder.startObject("path1"); - { - builder.startObject("path2.path3"); - builder.field("test2", "value2"); - builder.endObject(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); - - IllegalArgumentException ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test1", map, "value3") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test3", map, "value4") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - assertThat(map, equalTo(originalMap)); - } - - // traversal through lists - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1.path2"); - { - builder.startArray("path3"); - builder.startObject().field("test1", "value1").endObject(); - builder.endArray(); - } - builder.endObject(); - } - { - builder.startObject("path1"); - { - builder.startArray("path2.path3"); - builder.startObject().field("test2", "value2").endObject(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); - - IllegalArgumentException ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test1", map, "value3") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test3", map, "value4") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - assertThat(map, equalTo(originalMap)); - } - } - - public void testInsertValueCannotTraversePath() throws IOException { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1"); - { - builder.startArray("path2"); - builder.startArray(); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); - - IllegalArgumentException ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextFieldMapper.insertValue("path1.path2.test.test2", map, "value2") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.test] has value [value1] of type [String], which cannot be traversed into further") - ); - - assertThat(map, equalTo(originalMap)); - } - @Override protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { // Until a doc is indexed, the query is rewritten as match no docs diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java index 563093930c358..43eec68c28cd9 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java @@ -48,21 +48,20 @@ protected Predicate getRandomFieldsExcludeFilter() { @Override protected void assertEqualInstances(SemanticTextField expectedInstance, SemanticTextField newInstance) { assertThat(newInstance.fieldName(), equalTo(expectedInstance.fieldName())); - assertThat(newInstance.originalValues(), equalTo(expectedInstance.originalValues())); - assertThat(newInstance.inference().modelSettings(), equalTo(expectedInstance.inference().modelSettings())); - assertThat(newInstance.inference().chunks().size(), equalTo(expectedInstance.inference().chunks().size())); - SemanticTextField.ModelSettings modelSettings = newInstance.inference().modelSettings(); - for (int i = 0; i < newInstance.inference().chunks().size(); i++) { - assertThat(newInstance.inference().chunks().get(i).text(), equalTo(expectedInstance.inference().chunks().get(i).text())); + assertThat(newInstance.modelSettings(), equalTo(expectedInstance.modelSettings())); + assertThat(newInstance.chunks().size(), equalTo(expectedInstance.chunks().size())); + SemanticTextField.ModelSettings modelSettings = newInstance.modelSettings(); + for (int i = 0; i < newInstance.chunks().size(); i++) { + assertThat(newInstance.chunks().get(i).offset(), equalTo(expectedInstance.chunks().get(i).offset())); switch (modelSettings.taskType()) { case TEXT_EMBEDDING -> { double[] expectedVector = parseDenseVector( - expectedInstance.inference().chunks().get(i).rawEmbeddings(), + expectedInstance.chunks().get(i).rawEmbeddings(), modelSettings.dimensions(), expectedInstance.contentType() ); double[] newVector = parseDenseVector( - newInstance.inference().chunks().get(i).rawEmbeddings(), + newInstance.chunks().get(i).rawEmbeddings(), modelSettings.dimensions(), newInstance.contentType() ); @@ -70,11 +69,11 @@ protected void assertEqualInstances(SemanticTextField expectedInstance, Semantic } case SPARSE_EMBEDDING -> { List expectedTokens = parseWeightedTokens( - expectedInstance.inference().chunks().get(i).rawEmbeddings(), + expectedInstance.chunks().get(i).rawEmbeddings(), expectedInstance.contentType() ); List newTokens = parseWeightedTokens( - newInstance.inference().chunks().get(i).rawEmbeddings(), + newInstance.chunks().get(i).rawEmbeddings(), newInstance.contentType() ); assertThat(newTokens, equalTo(expectedTokens)); @@ -192,24 +191,27 @@ public static SemanticTextField randomSemanticText(String fieldName, Model model case SPARSE_EMBEDDING -> randomSparseEmbeddings(inputs); default -> throw new AssertionError("invalid task type: " + model.getTaskType().name()); }; - return semanticTextFieldFromChunkedInferenceResults(fieldName, model, inputs, results, contentType); + return semanticTextFieldFromChunkedInferenceResults( + fieldName, + model, + SemanticTextField.nodeStringValues(fieldName, inputs), + results, + contentType + ); } public static SemanticTextField semanticTextFieldFromChunkedInferenceResults( String fieldName, Model model, - List inputs, + String input, ChunkedInferenceServiceResults results, XContentType contentType ) { return new SemanticTextField( fieldName, - inputs, - new SemanticTextField.InferenceResult( - model.getInferenceEntityId(), - new SemanticTextField.ModelSettings(model), - toSemanticTextFieldChunks(List.of(results), contentType) - ), + model.getInferenceEntityId(), + new SemanticTextField.ModelSettings(model), + toSemanticTextFieldChunks(fieldName, input, List.of(results), contentType), contentType ); } @@ -233,33 +235,31 @@ public static Object randomSemanticTextInput() { } public static ChunkedInferenceServiceResults toChunkedResult(SemanticTextField field) throws IOException { - switch (field.inference().modelSettings().taskType()) { + switch (field.modelSettings().taskType()) { case SPARSE_EMBEDDING -> { List chunks = new ArrayList<>(); - for (var chunk : field.inference().chunks()) { + for (var chunk : field.chunks()) { var tokens = parseWeightedTokens(chunk.rawEmbeddings(), field.contentType()); - chunks.add(new MlChunkedTextExpansionResults.ChunkedResult(chunk.text(), tokens)); + // TODO + chunks.add(new MlChunkedTextExpansionResults.ChunkedResult(null, tokens)); } return new InferenceChunkedSparseEmbeddingResults(chunks); } case TEXT_EMBEDDING -> { List chunks = new ArrayList<>(); - for (var chunk : field.inference().chunks()) { - double[] values = parseDenseVector( - chunk.rawEmbeddings(), - field.inference().modelSettings().dimensions(), - field.contentType() - ); + for (var chunk : field.chunks()) { + double[] values = parseDenseVector(chunk.rawEmbeddings(), field.modelSettings().dimensions(), field.contentType()); + // TODO chunks.add( new InferenceChunkedTextEmbeddingFloatResults.InferenceFloatEmbeddingChunk( - chunk.text(), + null, FloatConversionUtils.floatArrayOf(values) ) ); } return new InferenceChunkedTextEmbeddingFloatResults(chunks); } - default -> throw new AssertionError("Invalid task_type: " + field.inference().modelSettings().taskType().name()); + default -> throw new AssertionError("Invalid task_type: " + field.modelSettings().taskType().name()); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index b8bcb766b53e1..67494ed10e471 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -27,9 +27,11 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.query.MatchNoneQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; @@ -41,7 +43,6 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.AbstractQueryTestCase; -import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; @@ -67,7 +68,6 @@ import static org.apache.lucene.search.BooleanClause.Occur.FILTER; import static org.apache.lucene.search.BooleanClause.Occur.MUST; import static org.apache.lucene.search.BooleanClause.Occur.SHOULD; -import static org.elasticsearch.index.IndexVersions.NEW_SPARSE_VECTOR; import static org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig.DEFAULT_RESULTS_FIELD; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -114,17 +114,12 @@ public void setUp() throws Exception { @Override protected Collection> getPlugins() { - return List.of(InferencePlugin.class, FakeMlPlugin.class); + return List.of(InferencePlugin.class, MapperExtrasPlugin.class, FakeMlPlugin.class); } @Override protected Settings createTestIndexSettings() { - // Randomize index version within compatible range - // we have to prefer CURRENT since with the range of versions we support it's rather unlikely to get the current actually. - IndexVersion indexVersionCreated = randomBoolean() - ? IndexVersion.current() - : IndexVersionUtils.randomVersionBetween(random(), NEW_SPARSE_VECTOR, IndexVersion.current()); - return Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, indexVersionCreated).build(); + return Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()).build(); } @Override @@ -148,6 +143,7 @@ private void applyRandomInferenceResults(MapperService mapperService) throws IOE // field SourceToParse sourceToParse = buildSemanticTextFieldWithInferenceResults(inferenceResultType, denseVectorElementType); if (sourceToParse != null) { + System.out.println(sourceToParse.source().utf8ToString()); ParsedDocument parsedDocument = mapperService.documentMapper().parse(sourceToParse); mapperService.merge( "_doc", @@ -193,9 +189,10 @@ protected void doAssertLuceneQuery(SemanticQueryBuilder queryBuilder, Query quer } private void assertSparseEmbeddingLuceneQuery(Query query) { - Query innerQuery = assertOuterBooleanQuery(query); + assertThat(query, instanceOf(SparseVectorQuery.class)); + Query termsQuery = ((SparseVectorQuery) query).getTermsQuery(); + Query innerQuery = assertOuterBooleanQuery(termsQuery); assertThat(innerQuery, instanceOf(BooleanQuery.class)); - BooleanQuery innerBooleanQuery = (BooleanQuery) innerQuery; assertThat(innerBooleanQuery.clauses().size(), equalTo(queryTokenCount)); innerBooleanQuery.forEach(c -> { @@ -347,15 +344,16 @@ private static SourceToParse buildSemanticTextFieldWithInferenceResults( if (modelSettings != null) { SemanticTextField semanticTextField = new SemanticTextField( SEMANTIC_TEXT_FIELD, + INFERENCE_ID, + modelSettings, List.of(), - new SemanticTextField.InferenceResult(INFERENCE_ID, modelSettings, List.of()), XContentType.JSON ); XContentBuilder builder = JsonXContent.contentBuilder().startObject(); - builder.field(semanticTextField.fieldName()); - builder.value(semanticTextField); - builder.endObject(); + builder.startObject(InferenceMetadataFieldsMapper.NAME); + builder.field(semanticTextField.fieldName(), semanticTextField); + builder.endObject().endObject(); sourceToParse = new SourceToParse("test", BytesReference.bytes(builder), XContentType.JSON); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SparseVectorQueryBuilderTests.java similarity index 97% rename from x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java rename to x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SparseVectorQueryBuilderTests.java index 13cf6d87728a8..a2d3eb9fc1198 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SparseVectorQueryBuilderTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.inference.queries; import org.apache.lucene.document.Document; import org.apache.lucene.document.FeatureField; @@ -40,9 +40,8 @@ import org.elasticsearch.xpack.core.ml.action.InferModelAction; import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; import org.elasticsearch.xpack.core.ml.search.WeightedToken; -import org.elasticsearch.xpack.ml.MachineLearning; +import org.elasticsearch.xpack.inference.InferencePlugin; import java.io.IOException; import java.lang.reflect.Method; @@ -50,7 +49,7 @@ import java.util.Collection; import java.util.List; -import static org.elasticsearch.xpack.ml.queries.SparseVectorQueryBuilder.QUERY_VECTOR_FIELD; +import static org.elasticsearch.xpack.inference.queries.SparseVectorQueryBuilder.QUERY_VECTOR_FIELD; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.hasSize; @@ -102,7 +101,7 @@ private SparseVectorQueryBuilder createTestQueryBuilder(TokenPruningConfig token @Override protected Collection> getPlugins() { - return List.of(MachineLearning.class, MapperExtrasPlugin.class, XPackClientPlugin.class); + return List.of(InferencePlugin.class, MapperExtrasPlugin.class, XPackClientPlugin.class); } @Override @@ -237,7 +236,7 @@ private void testDoToQuery(SparseVectorQueryBuilder queryBuilder, SearchExecutio // It's possible that all documents were pruned for aggressive pruning configurations assertTrue(query instanceof BooleanQuery || query instanceof MatchNoDocsQuery); } else { - assertTrue(query instanceof BooleanQuery); + assertTrue(query instanceof SparseVectorQuery); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/TextExpansionQueryBuilderTests.java similarity index 97% rename from x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java rename to x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/TextExpansionQueryBuilderTests.java index 00d50e0d0d7bb..090a4ec8556d2 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/TextExpansionQueryBuilderTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.inference.queries; import org.apache.lucene.document.Document; import org.apache.lucene.document.FeatureField; @@ -35,10 +35,8 @@ import org.elasticsearch.xpack.core.ml.action.InferModelAction; import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; import org.elasticsearch.xpack.core.ml.search.WeightedToken; -import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder; -import org.elasticsearch.xpack.ml.MachineLearning; +import org.elasticsearch.xpack.inference.InferencePlugin; import java.io.IOException; import java.lang.reflect.Method; @@ -77,7 +75,7 @@ protected TextExpansionQueryBuilder doCreateTestQueryBuilder() { @Override protected Collection> getPlugins() { - return List.of(MachineLearning.class, MapperExtrasPlugin.class, XPackClientPlugin.class); + return List.of(InferencePlugin.class, MapperExtrasPlugin.class, XPackClientPlugin.class); } @Override diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TokenPruningConfigTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/TokenPruningConfigTests.java similarity index 96% rename from x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TokenPruningConfigTests.java rename to x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/TokenPruningConfigTests.java index 8cdf44ae51dd4..a5e569950c319 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TokenPruningConfigTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/TokenPruningConfigTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.core.ml.search; +package org.elasticsearch.xpack.inference.queries; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.test.AbstractXContentSerializingTestCase; diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/WeightedTokensQueryBuilderTests.java similarity index 95% rename from x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java rename to x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/WeightedTokensQueryBuilderTests.java index 114ad90354c61..6833dd37a445d 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/WeightedTokensQueryBuilderTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.core.ml.search; +package org.elasticsearch.xpack.inference.queries; import org.apache.lucene.document.Document; import org.apache.lucene.document.FeatureField; @@ -35,13 +35,15 @@ import org.elasticsearch.xpack.core.ml.action.InferModelAction; import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; +import org.elasticsearch.xpack.core.ml.search.WeightedToken; +import org.elasticsearch.xpack.inference.InferencePlugin; import java.io.IOException; import java.lang.reflect.Method; import java.util.Collection; import java.util.List; -import static org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder.TOKENS_FIELD; +import static org.elasticsearch.xpack.inference.queries.WeightedTokensQueryBuilder.TOKENS_FIELD; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.Matchers.either; @@ -75,7 +77,7 @@ private WeightedTokensQueryBuilder createTestQueryBuilder(boolean onlyScorePrune @Override protected Collection> getPlugins() { - return List.of(XPackClientPlugin.class, MapperExtrasPlugin.class); + return List.of(InferencePlugin.class, MapperExtrasPlugin.class, XPackClientPlugin.class); } @Override @@ -271,8 +273,10 @@ public void testPruningIsAppliedCorrectly() throws IOException { } private void assertCorrectLuceneQuery(String name, Query query, List expectedFeatureFields) { - assertTrue(query instanceof BooleanQuery); - List booleanClauses = ((BooleanQuery) query).clauses(); + assertTrue(query instanceof SparseVectorQuery); + Query termsQuery = ((SparseVectorQuery) query).getTermsQuery(); + assertTrue(termsQuery instanceof BooleanQuery); + List booleanClauses = ((BooleanQuery) termsQuery).clauses(); assertEquals( name + " had " + booleanClauses.size() + " clauses, expected " + expectedFeatureFields.size(), expectedFeatureFields.size(), @@ -343,8 +347,10 @@ public void testMustRewrite() throws IOException { @Override protected void doAssertLuceneQuery(WeightedTokensQueryBuilder queryBuilder, Query query, SearchExecutionContext context) { - assertThat(query, instanceOf(BooleanQuery.class)); - BooleanQuery booleanQuery = (BooleanQuery) query; + assertThat(query, instanceOf(SparseVectorQuery.class)); + Query termsQuery = ((SparseVectorQuery) query).getTermsQuery(); + assertThat(termsQuery, instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) termsQuery; assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1); assertThat(booleanQuery.clauses(), hasSize(NUM_TOKENS)); diff --git a/x-pack/plugin/inference/src/test/resources/org/elasticsearch/xpack/inference/highlight/moby-dick.json b/x-pack/plugin/inference/src/test/resources/org/elasticsearch/xpack/inference/highlight/moby-dick.json new file mode 100644 index 0000000000000..0441873531dca --- /dev/null +++ b/x-pack/plugin/inference/src/test/resources/org/elasticsearch/xpack/inference/highlight/moby-dick.json @@ -0,0 +1,4339 @@ +{ + "_inference_fields": { + "dense_field": { + "inference_id": ".multilingual-e5-small-elasticsearch", + "model_settings": { + "task_type": "text_embedding", + "dimensions": 384, + "similarity": "cosine", + "element_type": "float" + }, + "chunks": [ + { + "offset": { + "field": "field", + "start": 0, + "end": 1329 + }, + "embeddings": [ + 0.04979738, + -0.049024884, + -0.06267286, + -0.06284121, + 0.031987894, + -0.08689449, + 0.044664543, + 0.076699525, + 0.06471937, + 0.028753767, + 0.02369647, + 0.011940286, + 0.041063324, + -0.0031249018, + -0.012605156, + 0.020917466, + 0.0698649, + -0.07892161, + -0.010968826, + -0.060116883, + 0.012380837, + -0.022275316, + -0.02007232, + 0.053651124, + 0.045564346, + 0.06287834, + -0.026206115, + 0.034378637, + 0.028786598, + -0.07342769, + -0.05178595, + -0.03394133, + 0.06494073, + -0.07682645, + 0.039480515, + 8.8730786E-4, + -0.035883103, + -0.02245836, + 0.051104713, + -0.02161596, + -0.0014411546, + 0.011840296, + 0.044061452, + 0.018550612, + 0.07816852, + 0.023765374, + -0.04192663, + 0.056223065, + -0.029935915, + -0.039640833, + -0.061091922, + 0.048074532, + 0.03252561, + 0.07752945, + 0.0374488, + -0.0938137, + -0.06071223, + -0.053990547, + -0.06630911, + 0.040193927, + 0.038531914, + -0.023115646, + -0.0046846615, + 0.025255106, + 0.074686274, + 0.10130572, + 0.06328507, + 0.017575556, + -0.040289026, + -0.013285351, + -0.06927493, + 0.08576633, + -0.003492294, + -0.041360997, + 0.036476493, + 0.04270745, + 0.060671005, + -0.0651591, + 0.014901469, + -0.04655241, + -0.006525806, + -0.037813406, + -0.02792913, + 0.0472146, + -0.07142533, + 0.06478618, + 0.0716035, + -0.04885643, + 0.073330306, + -0.055672232, + 0.057761118, + 0.07276108, + -0.076485656, + -0.06970012, + -0.0692586, + -0.051378023, + -0.06273683, + 0.03469511, + 0.05773398, + -0.08031594, + 0.10501066, + 7.310874E-4, + 0.050745558, + -0.016756695, + -0.031716295, + 0.0050844094, + 0.031707063, + -0.039744828, + 0.05837439, + -0.09262242, + -0.04054004, + -0.0075583286, + 0.061934657, + 0.035783943, + -0.055616625, + -0.047291458, + -0.027218537, + -0.011617415, + 0.026992036, + -0.03259098, + 0.08588563, + -0.015476044, + -0.04406553, + -0.058256716, + -0.049162734, + -0.018606737, + 0.02703335, + 0.023426747, + 0.028659008, + 0.014869456, + 0.04368826, + 0.03709602, + 0.0059531354, + 0.012405994, + 0.023295961, + 0.09050855, + -0.025719937, + -0.038713705, + 0.02654418, + -0.07065918, + -0.04294843, + 0.050370634, + -0.0033409365, + 0.052235987, + 0.07693816, + 0.043221552, + 0.07534102, + -0.048658077, + 0.06533618, + -0.016787754, + 0.034524675, + -0.0312765, + 0.05486932, + 0.06286382, + 0.03278902, + -0.06772777, + -0.087687664, + -0.0665437, + 0.032016467, + 0.066101246, + -0.11844821, + -0.032777846, + -0.053238686, + -0.015841002, + -0.067591116, + -0.048692815, + -0.013267198, + 0.09390532, + -0.029956369, + -0.021315884, + -0.03857401, + 0.03929155, + -0.023058, + 0.051734913, + -0.023478175, + 0.035602726, + -0.08242782, + 0.058339056, + 0.045796614, + 0.05448777, + -0.047254823, + 0.020266606, + -0.08056925, + 0.0015524789, + -0.041604258, + 0.00718068, + -0.044556983, + 0.02106678, + 0.04749506, + -0.01840031, + 0.023407241, + 0.070747316, + -0.04295862, + -0.07703961, + -0.0660327, + 0.013576343, + -0.023668775, + 0.056404322, + 0.09587012, + 0.05701044, + -0.036240827, + -0.004225128, + 0.0067939283, + 0.035346694, + 0.026707595, + 0.017638108, + -0.032440145, + 0.04708931, + 0.012399647, + 0.07325736, + 0.027942428, + -0.08172854, + -0.07065871, + 0.033890083, + -0.033598673, + -0.08178952, + 0.028348992, + 0.04411821, + -0.044644725, + 0.03074351, + 0.0935692, + -0.04762361, + 0.051226508, + -0.08009367, + -0.03847554, + 0.016323369, + 0.038776945, + -0.059975337, + -0.057062503, + 0.010849399, + -0.030187564, + -0.026308322, + -0.067967005, + -0.079719126, + -0.08646553, + -0.09048591, + -0.018597756, + 0.0047154897, + 0.058588482, + -0.09175631, + -0.08307076, + -0.035472285, + 0.009541795, + -0.026162423, + 0.03335252, + 0.018381111, + -0.015803808, + 0.021074254, + -0.010327698, + 0.025227644, + 0.06197503, + -0.059137702, + -0.018673804, + 0.00707259, + -0.019355131, + 0.026796991, + 0.025893785, + 0.0685412, + -0.06301929, + 0.003187423, + 0.029026637, + -0.019066911, + 0.09354283, + 0.1061943, + 0.053696748, + -0.0016658951, + -0.0030081598, + -0.028984388, + -0.037768397, + -0.035445668, + -0.026019065, + 0.028805656, + 0.021448314, + -0.059069104, + -0.06226507, + -0.05508101, + 0.022365203, + 0.09221683, + -0.07698258, + -0.055819187, + 0.061300304, + 0.05965072, + 0.029480126, + 0.057750076, + 0.05180143, + -0.0159732, + -0.0823228, + 0.09240897, + -0.08318623, + 0.002020457, + 0.010953976, + -0.09685372, + 0.05271347, + -0.04232834, + 0.061398283, + 0.044973806, + -0.02088832, + 0.044399235, + -0.014687839, + 0.06304118, + -0.022936989, + -0.033005796, + 0.074231274, + 0.023939423, + -0.087914266, + 0.036014125, + 0.0062753465, + -0.03355067, + 0.036039222, + 0.012712498, + 0.057161637, + 0.05654562, + -0.018600527, + -0.035825036, + 0.06950757, + 0.05828935, + 3.8511172E-4, + -0.008722925, + -0.0522819, + -0.10943554, + -0.033790745, + -0.03357093, + -0.031342223, + -0.07834354, + 0.032603115, + 0.026984481, + -0.02969966, + -0.048259087, + -0.012163297, + 0.007793295, + 0.05574152, + -0.022157356, + -0.03623348, + 0.037055306, + -0.033247784, + -0.0070533184, + -0.057643052, + 0.08567554, + -0.07278431, + -0.06556353, + 0.0308075, + 0.052940007, + -0.0566871, + 0.0287218, + -0.06409354, + -0.0627855, + 0.06254832, + -0.027221028, + -0.049813032, + 0.03935744, + 0.07234624, + -0.09398941, + 0.011342199, + 0.028675176, + -0.022932779, + 0.009481765, + -0.022316003, + -0.015413267, + 0.039174553, + 0.061736017, + -0.04229645, + -0.052905895, + 0.018588098, + 0.070939854, + 0.0748456, + 0.08648295, + -0.036223643, + 0.008473833, + 0.053857446, + -0.07680301, + 0.0785199, + 0.03982, + -0.039509695, + 0.03373825, + -0.063460656, + -0.038993217, + -0.073084034, + 0.062789686, + -0.081148736, + -0.035036374, + 0.0754924, + 0.087299235, + 0.04096056, + 0.027776068 + ] + }, + { + "offset": { + "field": "field", + "start": 1281, + "end": 2685 + }, + "embeddings": [ + 0.035266396, + -0.044093177, + -0.04158629, + -0.045926083, + 0.06521479, + -0.050932676, + 0.03961649, + 0.037828345, + 0.025232289, + 0.029732272, + 0.034696255, + -8.805868E-4, + 0.053202488, + -0.0047244085, + -0.037418325, + 0.0770543, + 0.105328426, + -0.036611717, + -0.039531372, + -0.082817726, + 0.021342339, + -0.01843601, + -0.042259317, + 0.06317797, + 0.036926534, + 0.069380246, + -0.059219223, + 0.043066744, + -0.006286799, + -0.06797077, + -0.042236328, + -0.036919896, + 0.034179892, + -0.026980922, + 0.051384695, + 0.03826208, + -0.012975077, + -0.025295, + 0.015923942, + -0.027602347, + -0.022515642, + -5.98229E-4, + 0.06122002, + 0.050380763, + 0.04684541, + 0.08975921, + -0.03755087, + 0.046912387, + -0.038697798, + -0.06988436, + -0.05219296, + 0.041337684, + 0.023435602, + 0.023100449, + 0.0352068, + -0.060556572, + -0.042356305, + -0.04503575, + -0.07377149, + 0.084542595, + 0.028644886, + -0.024366854, + -0.009185593, + 0.01255741, + 0.06999743, + 0.09439326, + 0.03800093, + -0.008208419, + -0.09673358, + 0.0023752274, + -0.07626475, + 0.098563485, + -0.012569254, + -0.08954541, + -0.010908005, + 0.016228944, + 0.05984263, + -0.051004995, + 0.024147974, + -0.050623365, + -0.01668758, + -0.007899899, + -0.029833568, + 0.034388572, + -0.03505155, + 0.08271141, + 0.08569518, + -0.053716324, + 0.06806682, + -0.067159526, + 0.043537326, + 0.09806787, + -0.041304354, + -0.05103136, + -0.109280586, + -0.06120091, + -0.09363793, + 0.032154918, + 0.12145496, + -0.049101993, + 0.07359592, + -0.010511772, + 0.074003994, + -0.013990566, + -0.026140982, + 0.052602872, + 0.09067435, + -0.070553906, + 0.057253607, + -0.048433788, + -0.024026526, + 0.018851176, + 0.04090621, + 0.058670815, + -0.08735305, + -0.022817774, + -0.042838365, + -0.016657954, + 0.03224679, + -0.01952135, + 0.016957905, + -2.0869492E-4, + -0.0039428347, + -0.05186959, + -0.062616155, + -0.056938402, + 0.00882266, + 0.055156156, + 0.03221514, + 0.026071686, + 0.073993444, + 0.060973227, + 0.040219847, + 0.030080495, + 0.074190594, + 0.10667069, + -0.035753082, + -0.031658202, + 0.024792355, + -0.056956623, + -0.04320206, + 0.042175233, + -0.04459597, + 0.063075, + 0.03682348, + 0.087945856, + 0.060606126, + -0.02543529, + 0.101843245, + -0.02052844, + 0.065993346, + -0.01580399, + 0.01996002, + 0.025750767, + 0.044288505, + -0.055157375, + -0.0834102, + -0.07820265, + 0.01860491, + 0.052071907, + -0.082538106, + -0.06682723, + -0.031070147, + -5.8769673E-4, + -0.05546835, + -0.041754596, + 0.007750717, + 0.06550786, + -0.024858464, + -0.018027157, + -0.070528544, + 0.04311053, + -0.04646167, + 0.038272627, + -0.023141516, + 0.035724208, + -0.044601943, + 0.031177005, + 0.060686704, + -0.008791896, + -0.045239996, + -0.0015549486, + -0.023560282, + -0.02124949, + -0.028758224, + -0.01994061, + -0.031099308, + 0.033113, + 0.04315839, + -0.014818203, + -0.016493127, + 0.03928858, + -0.049371842, + -0.057269108, + -0.07144285, + 0.045010682, + -0.02822895, + 0.026698994, + 0.08181065, + 0.0497983, + -0.0033907534, + -0.023786934, + 0.013289109, + 0.011108559, + 0.075379916, + 0.012320797, + -0.045297462, + 0.09245994, + -0.027429234, + 0.058199212, + 0.06857553, + -0.0705278, + -0.055046707, + 0.025127407, + -0.044880733, + -0.07819047, + -0.016903652, + 0.031777192, + -0.027202426, + 0.033661053, + 0.082595035, + -0.010536667, + 0.067396104, + -0.048291907, + -0.038250096, + 0.009253138, + 0.040732533, + -0.06330689, + -0.074753396, + 0.04644269, + -0.029993957, + -0.033248927, + -0.053877644, + -0.098819815, + -0.0260293, + -0.030682972, + -0.034318104, + -0.014064486, + -0.020334287, + -0.12791014, + -0.017047742, + -0.052973263, + 0.017977173, + -0.04006773, + 0.066867575, + -0.07052264, + -0.02385362, + 0.028173303, + -0.07004571, + 0.053027462, + 0.039910827, + -0.026693301, + -0.07183149, + -0.073637374, + 0.008942395, + 0.012631494, + 0.040236488, + 0.07312884, + -0.1052349, + 0.013788912, + 0.05933606, + -0.012417836, + 0.07844875, + 0.035665687, + 0.0692123, + 0.011978119, + 0.0032255524, + -0.02082568, + -0.027911682, + -0.008114962, + -0.100171834, + 0.012006536, + 0.027355125, + -0.069779284, + -0.06982269, + -0.02499225, + 0.06460924, + 0.10172508, + -0.036987256, + -0.027838582, + 0.06524349, + 0.03478602, + 0.047589943, + 0.0034753575, + 0.035028856, + 0.03955437, + -0.056392808, + 0.097454645, + -0.067250304, + -0.016183723, + -0.010761581, + -0.046665948, + 0.052830804, + -0.06562526, + 0.0143448245, + 0.035826858, + -0.030075911, + 0.074224986, + -0.01484229, + 0.047223467, + -0.05010028, + -0.08323114, + 0.024850823, + 0.0035780836, + -0.04660368, + 0.012318496, + 0.035511326, + -0.006625753, + 0.023968346, + 0.04152267, + 0.066447295, + 0.031807587, + -0.026121954, + -0.06298641, + 0.09144068, + 0.07982457, + -0.047639504, + -0.011746696, + -0.03417992, + -0.066457696, + -0.015668094, + -0.036196046, + -0.0029406173, + -0.054462895, + 0.0029062356, + 0.019851439, + 0.0064928187, + -0.06603669, + 0.016133538, + 0.0441623, + -0.013663719, + -0.027901169, + -0.05862742, + 0.035473794, + -0.080742985, + -0.012147599, + -0.06269955, + 0.045475967, + -0.07024215, + -0.09113673, + 0.018147662, + 0.037072584, + -0.011495025, + 0.049087547, + 0.00970628, + -0.043941073, + 0.052213665, + -0.027107846, + -0.05408287, + 0.04391075, + 0.05903725, + -0.11579457, + 0.0179941, + 0.023727184, + -0.027765218, + 0.058974497, + -0.041185096, + -0.06411593, + 0.05297974, + 0.014402285, + -0.07491701, + -0.046273973, + 0.025595015, + 0.072552234, + 0.07913544, + 0.05780724, + 0.010108354, + -0.032680638, + 0.07236567, + -0.059348762, + 0.07916222, + 0.06330368, + -0.040674247, + 0.014580703, + -0.056963094, + -0.05973973, + -0.028593862, + 0.054875106, + -0.083951905, + -0.030538274, + 0.04507664, + 0.057579767, + 0.047284584, + 0.029037142 + ] + }, + { + "offset": { + "field": "field", + "start": 2660, + "end": 3932 + }, + "embeddings": [ + 0.060263444, + -0.011627793, + -0.07406454, + -0.061137985, + 0.035276245, + -0.06492958, + 0.036304567, + 0.03849267, + 0.032589767, + 0.034697585, + 0.055276874, + 0.0067610983, + 0.07107068, + -0.028453767, + -0.023335157, + 0.066190325, + 0.09514554, + -0.031573914, + -0.036566608, + -0.03254594, + 0.01258663, + -0.008238347, + -0.024652604, + 0.058704935, + 0.029146092, + 0.0538354, + -0.033388253, + 0.035337757, + 0.048961233, + -0.06575967, + -0.060514227, + -0.054762013, + 0.049676932, + -0.062150035, + -0.019077798, + 0.018297857, + -0.043477535, + -0.06992983, + 0.041489013, + -0.06091549, + 0.00857616, + 0.0013787356, + 0.059843466, + 0.065656655, + 0.07694915, + 0.07400389, + -0.008740612, + 0.02598118, + -0.04293424, + -0.029819168, + -0.057130232, + 0.08674767, + 0.0020843677, + 0.094413035, + 0.026790254, + -0.07488432, + -0.06260386, + -0.059874497, + -0.022945922, + 0.07328087, + 0.0012629362, + -0.014891515, + -0.017552191, + 0.04158861, + 0.074740976, + 0.13079657, + 0.03465537, + 0.033060353, + -0.071494736, + -0.042101286, + -0.09333479, + 0.075504355, + -0.048976846, + -0.07538883, + 0.016815975, + 0.014265034, + 0.04265424, + -0.055298902, + 0.021028202, + -0.043243185, + -0.035213232, + -0.03872826, + -0.03735794, + -0.009753857, + -0.06591585, + 0.06382551, + 0.070999734, + -0.07432682, + 0.051665448, + -0.06200163, + 0.035289973, + 0.052576542, + -0.08547946, + -0.051438782, + -0.06883237, + -0.04034897, + -0.1139505, + 0.029103009, + 0.056813173, + -0.033878006, + 0.065993756, + 0.0012909115, + 0.030890198, + -0.026131464, + -0.042535, + 0.044831734, + 0.075214975, + -0.045039084, + 0.056481812, + -0.052748743, + -0.042459268, + 0.016207209, + 0.032704834, + 0.04342557, + -0.031859122, + -0.037544478, + -0.023973966, + -0.056660555, + 0.03458018, + -0.043174002, + 0.07610799, + -0.040468093, + -0.041871496, + -0.04984353, + -0.040546015, + -0.019524354, + 0.04170828, + 0.020450952, + 0.0404415, + 0.03985574, + 0.032101743, + 0.05156037, + 0.048545454, + 0.03334057, + 0.025009904, + 0.090053804, + -0.030840183, + 0.0017696177, + 0.01567415, + -0.04152217, + -0.031758398, + 0.020865917, + -0.05755524, + 0.04980784, + 0.050742626, + 0.07122176, + 0.06281647, + -0.012783542, + 0.08377948, + -0.029796185, + 0.017047247, + 0.011766123, + 0.03557249, + 0.019037597, + 0.028088165, + -0.07208148, + -0.08005564, + -0.057871744, + 0.0153855365, + 0.054635677, + -0.05614729, + -0.031374976, + -0.06079491, + -0.041638877, + -0.055767294, + -0.048497472, + -0.007389678, + 0.012500725, + 0.02392964, + -0.03444656, + -0.032773327, + 0.050030876, + -0.062147807, + 0.03894452, + 0.005381243, + 0.005100098, + -0.082184665, + 0.01259893, + 0.06914528, + 0.0502573, + -0.014370648, + -0.039859537, + -0.06393138, + -0.061919075, + -0.014192415, + -0.032273103, + -0.0464307, + -7.1235467E-4, + 0.051684704, + -0.006423554, + 0.0010265269, + 0.057130195, + -0.044715635, + -0.08753112, + -0.060454912, + 0.04602993, + -0.009173136, + 0.030031096, + 0.05415974, + 0.040149722, + -0.030073693, + -0.0026639393, + 0.06262825, + 0.0073858122, + 0.07543514, + 0.013202129, + -0.055555925, + 0.076006316, + 0.0069068773, + 0.037352845, + 0.05844025, + -0.087049164, + -0.0934209, + 0.021478496, + -0.06904104, + -0.035960656, + 0.012564326, + 0.08203622, + -0.0589588, + 0.038763568, + 0.059626605, + -0.0015563822, + 0.056733213, + -0.06597729, + -0.0487247, + 0.030533105, + 0.059536766, + -0.043689486, + -0.044405177, + 0.039805703, + -0.033027582, + -0.034072082, + -0.080049135, + -0.08942587, + 0.019459073, + -0.044563998, + -0.06931994, + 0.021550108, + 0.022951653, + -0.051044974, + -0.03676219, + -0.050016202, + 0.03538716, + -0.06436871, + 0.09116231, + -0.03250418, + -0.008333591, + 0.02689493, + -0.023252478, + 0.04825159, + 0.07439804, + -0.08796822, + -0.04385184, + -0.05042988, + -0.056784004, + 0.057135444, + 0.055787697, + 0.056427166, + -0.09837734, + -0.0036608325, + 0.013839507, + -0.020212527, + 0.09865649, + 0.080563836, + 0.07525103, + 0.033415828, + -0.02267602, + -0.067864396, + -0.05965757, + -0.010466497, + -0.047837727, + 0.017926434, + 0.032667693, + -0.069811225, + -0.011690649, + -0.044193, + 0.023269301, + 0.07142345, + -0.0031622057, + -0.0047916253, + 0.07077121, + 0.03767678, + 0.03410683, + 0.036370695, + 0.01696176, + -0.026317174, + -0.008320507, + 0.09212631, + -0.07694487, + -0.034243643, + 0.0110022295, + -0.060418822, + 0.07019466, + -0.051362276, + 0.078166254, + 0.055226926, + -0.04018289, + 0.063233584, + -0.032110535, + 0.08297619, + -0.009597479, + -0.057851054, + 0.042411964, + 0.01997834, + -0.07460758, + 0.061238132, + 0.0050869896, + 0.023704918, + 0.03991232, + 0.07121017, + 0.067201145, + 0.04065065, + -0.05990329, + -0.04676335, + 0.08255157, + 0.039478876, + -0.05370604, + -0.015417656, + -0.061638564, + -0.113423236, + -0.020872636, + -0.06506326, + -0.019086778, + -0.07550901, + 0.023448454, + 0.031439524, + -0.018936215, + -0.061786037, + 0.06917624, + -0.016625067, + 0.04495578, + -0.05168137, + -0.06433023, + 0.019382514, + -0.030735377, + 0.010870069, + -0.05917494, + 0.033261493, + -0.04571641, + -0.078268915, + 0.03133073, + 0.04491661, + -0.036725685, + 0.05521663, + -0.02092035, + -0.04205282, + 0.035851613, + -0.0015220186, + -0.02102678, + 0.054027468, + 0.07405003, + -0.09111273, + 0.005834604, + 0.053133536, + -0.018385805, + 0.024131889, + -0.04136735, + -0.060419146, + 0.006526669, + 0.046679422, + -0.07396608, + -0.031180743, + 0.032524955, + 0.05950253, + 0.08502798, + 0.05705178, + 0.041140076, + 0.015673824, + 0.052156717, + 0.008876251, + 0.05783481, + 0.06875354, + -0.01914275, + 0.019451428, + 0.0017306518, + -0.09160311, + -0.06650555, + 0.06903168, + -0.11052152, + -0.08185994, + 0.0152816, + 0.056960557, + 0.06667231, + 0.042444445 + ] + }, + { + "offset": { + "field": "field", + "start": 3811, + "end": 5053 + }, + "embeddings": [ + 0.08132793, + -0.047893565, + -0.038560215, + -0.03994145, + 0.0558572, + -0.03973998, + 0.020470386, + 0.058355197, + 0.01980108, + 0.03896921, + 0.04879353, + -0.0074668517, + 0.05397047, + -0.010254351, + -0.042885937, + 0.08040558, + 0.091155075, + -0.052957732, + -0.035930026, + -0.03653066, + 0.013761402, + -0.018923452, + -0.04685841, + 0.04731581, + 0.027308341, + 0.020014657, + -0.04545417, + 0.028795317, + 0.04793647, + -0.0704067, + -0.042252712, + -0.05682541, + 0.066968046, + -0.09382263, + 0.02506045, + 0.019845745, + -0.015298284, + -0.044756494, + 0.032255, + -0.03357616, + -0.01634103, + 0.012012115, + 0.05378444, + 0.036496706, + 0.06764162, + 0.08833494, + -0.021727582, + 0.0363613, + -0.08750663, + -0.006557421, + -0.037404615, + 0.083952226, + -0.005245814, + 0.06731529, + 0.027517168, + -0.069114335, + -0.06600843, + -0.055819273, + -0.09175115, + 0.0908832, + 0.045391496, + -0.03755004, + 0.0018628142, + 0.015974216, + 0.034663454, + 0.07421443, + 0.045072228, + -0.0134752095, + -0.053152926, + -0.011296686, + -0.052672, + 0.064373136, + -0.009546203, + -0.08377613, + -0.0018304663, + 0.023774406, + 0.029625371, + -0.07841949, + 0.025992012, + -0.034211818, + -0.04341797, + -0.074051395, + -0.022789141, + -0.014875852, + -0.050796572, + 0.08730017, + 0.09586879, + -0.06974203, + 0.048677806, + -0.04995857, + 0.038378827, + 0.06020236, + -0.060032416, + -0.05082279, + -0.08157444, + -0.05524207, + -0.09547329, + 0.061129954, + 0.07330997, + -0.060067892, + 0.08218149, + -0.011082627, + 0.041907076, + -0.0016668623, + -0.020462176, + 0.0074657737, + 0.04153701, + -0.053815063, + 0.08984907, + -0.04856424, + -0.019359102, + 0.025180845, + 0.0580883, + 0.051315922, + -0.07716719, + -0.06010258, + -0.024739653, + -0.020786842, + 0.021310974, + -0.049855735, + 0.058490653, + -0.019344086, + -0.064905055, + -0.043594714, + -0.0414785, + -0.026626132, + 0.010384775, + 0.035636406, + 0.023757294, + 0.02353357, + 0.038512193, + 0.043469686, + 0.025641369, + 0.06005725, + 0.033108205, + 0.093584485, + -0.008513592, + 0.001993488, + 0.0266426, + -0.0135798985, + -0.058448963, + 0.030007407, + -0.03873391, + 0.012962885, + 0.03407742, + 0.052897573, + 0.048484456, + -0.0037075893, + 0.10519477, + -0.05359505, + 0.062401634, + -0.02432665, + 0.006226394, + 0.027923357, + 0.0724623, + -0.050624184, + -0.08479024, + -0.08688512, + 0.032354686, + 0.06821751, + -0.077089824, + -0.0014580968, + -0.04177363, + -0.027564395, + -0.0448798, + -0.042052064, + -0.009614605, + 0.07208001, + 7.672266E-4, + -0.075805336, + -0.05364635, + 0.06561775, + -0.032068495, + 0.04494038, + -0.044013828, + -0.0190166, + -0.022102332, + 0.034658328, + 0.050540138, + -0.01942592, + -0.020942092, + -0.02782304, + -0.065396436, + -0.04059357, + -0.049896274, + -0.0376796, + -0.043743063, + 0.040360678, + 0.07515184, + -0.018274747, + -0.009190847, + 0.055620983, + -0.041216724, + -0.073044226, + -0.05465287, + 0.010405976, + -0.013486699, + 0.02830836, + 0.06836122, + 0.020561688, + -0.01688864, + -0.020571496, + 0.04652389, + 0.020004654, + 0.060006775, + 0.00938477, + -0.05559232, + 0.08781834, + -0.025533192, + 0.052398734, + 0.057509296, + -0.09851155, + -0.09180138, + 0.038183447, + -0.06369883, + -0.054243114, + 0.020855743, + 0.10808265, + -0.04326038, + 0.023134552, + 0.088371366, + -0.03126334, + 0.044376496, + -0.07867371, + -0.03890121, + 0.051151622, + 0.037706945, + -0.03370568, + -0.008004474, + 0.041355547, + -0.023588097, + -0.026358435, + -0.04786497, + -0.108022444, + -0.04574715, + -0.03736998, + -0.048178125, + 0.034921553, + 0.06676284, + -0.060398124, + -0.024748335, + -0.02818482, + 0.02239888, + -0.07246388, + 0.04970122, + -0.010178895, + -0.010817003, + 0.05318733, + -0.050516233, + 0.04490196, + 0.057144474, + -0.031509876, + -0.06828971, + -0.057091262, + -0.041589297, + 0.034988903, + 0.05772322, + 0.08349064, + -0.07048785, + 0.02914558, + 0.037508357, + -0.018101186, + 0.09606959, + 0.09399272, + 0.033781327, + 0.026298832, + -0.007974394, + -0.04828518, + -0.030074345, + -0.008707313, + -0.06095452, + 0.0052815387, + 0.053281322, + -0.07403459, + -0.04375484, + -0.0024250182, + 0.030269688, + 0.08677468, + -0.044580005, + -0.023698311, + 0.09059957, + 0.03502518, + 0.039508294, + 0.03801833, + 0.051657647, + -0.023771202, + -0.021416105, + 0.08418382, + -0.07468558, + -0.022965085, + -0.037451513, + -0.070336066, + 0.07278321, + -0.06958301, + 0.061745293, + 0.034864236, + -0.05098527, + 0.075577505, + -0.01925352, + 0.028659336, + -0.01881169, + -0.09233528, + 0.052659664, + 0.046592344, + -0.08144535, + 0.04045172, + 0.021832049, + 0.01539719, + 0.036698546, + 0.048459183, + 0.0750458, + 0.03523083, + -0.093105264, + -0.042830218, + 0.08817936, + 0.05500005, + -0.03145603, + 0.002137886, + -0.09369107, + -0.0859627, + -0.00988302, + -0.03224872, + 0.009135905, + -0.07538188, + 0.01729995, + 0.05211995, + -0.028220842, + -0.09644254, + 0.08197546, + 0.021641405, + 0.044149674, + -0.02265579, + -0.03705849, + 0.0066629667, + -0.038971607, + 0.0077898037, + -0.07302501, + 0.050258975, + -0.031734023, + -0.05120743, + 0.006855154, + 0.03317757, + -0.054895062, + 0.020226864, + -0.028702717, + -0.054496907, + 0.03333692, + -0.01552826, + -0.024065949, + 0.034094118, + 0.06990785, + -0.11025783, + -0.022972278, + 0.094185725, + -0.034931783, + 0.045400895, + 0.0029167728, + -0.040711746, + 0.0069749537, + 0.02316794, + -0.07623587, + -0.032300122, + 0.040407263, + 0.056106865, + 0.084427394, + 0.09241687, + -0.014235544, + -9.3176577E-4, + 0.056472927, + -0.066110075, + 0.07017728, + 0.06319923, + -0.026196225, + 0.013847319, + -0.047189496, + -0.034471143, + -0.035234082, + 0.015169919, + -0.06258794, + -0.044817522, + 0.052238535, + 0.052592035, + 0.024454227, + 0.04652183 + ] + }, + { + "offset": { + "field": "field", + "start": 5013, + "end": 6270 + }, + "embeddings": [ + 0.050837185, + -0.058507636, + -0.08578978, + -0.07158996, + 0.062322024, + -0.06394126, + 0.033397503, + 0.066029586, + 0.059980292, + 0.014527764, + 0.027411256, + -0.019332865, + 0.09169677, + -0.028353753, + -0.024152989, + 0.026958432, + 0.06263654, + -0.057214282, + -0.01730705, + -0.06580778, + 0.012587115, + -0.0013240383, + -0.034304086, + 0.07279054, + 0.03153362, + 0.022333346, + -0.019766338, + 0.01765917, + 0.018127792, + -0.031060342, + -0.035549946, + -0.055531062, + 0.020338904, + -0.102598086, + 0.01697388, + 0.01325798, + -0.05225683, + -0.028536074, + 0.018755725, + -0.03648683, + 0.0047455817, + 0.007937342, + 0.05206842, + 0.07168695, + 0.08550893, + 0.0469701, + -0.053452007, + 0.050660927, + -0.028207462, + -0.038872562, + -0.044887412, + 0.0740998, + -0.013441051, + 0.07634305, + 0.0055091325, + -0.11408244, + -0.06909077, + -0.07962894, + -0.066142306, + 0.07568293, + 0.0025674207, + -0.080196865, + -0.006201128, + 0.00818501, + 0.07924847, + 0.10414052, + 0.042439207, + 0.035281047, + -0.040974326, + -0.04297422, + -0.024786443, + 0.06963027, + -0.016090378, + -0.077486746, + 0.013267866, + 0.0382188, + 0.075773925, + -0.045972046, + 0.021897435, + -0.057650458, + -0.026901621, + -0.047625203, + 0.0012063365, + 0.025827816, + -0.023581855, + 0.059192963, + 0.06759525, + -0.06503824, + 0.051352326, + -0.04751885, + 0.06295226, + 0.03710186, + -0.05161417, + -0.049769994, + -0.08769117, + -0.045511324, + -0.051784497, + 0.056573063, + 0.040720508, + -0.035331022, + 0.073139556, + -8.214206E-4, + 0.037490595, + -0.0021819966, + -0.024999384, + 0.019722067, + 0.024325203, + -0.044025563, + 0.06545914, + -0.019343818, + -0.0023573453, + 0.0018968938, + 0.06038538, + 0.02333629, + -0.06574865, + -0.027746813, + -0.025081333, + -0.014503653, + 0.02887482, + -0.034452263, + 0.07113403, + -0.03859757, + -0.06710839, + -0.0383765, + -0.06811556, + 0.0061613885, + 0.034110006, + 0.05640678, + 0.06142383, + 0.009073967, + 0.043047454, + 0.03466423, + 0.027530612, + 0.032211494, + 0.053615883, + 0.07377551, + -0.01758648, + -0.02144349, + 0.03956204, + -0.031308886, + -0.062522896, + 0.07004273, + -0.041059777, + 0.03381151, + 0.096379966, + 0.059807573, + 0.076913215, + 7.038924E-4, + 0.081829004, + -0.06641827, + 0.044492118, + -0.036664132, + 0.08141791, + 0.039923627, + 0.079390235, + -0.05483655, + -0.092164926, + -0.07556358, + 0.024775334, + 0.039525755, + -0.052411165, + -0.044712305, + -0.1251298, + 0.019936236, + -0.05971529, + -0.071407795, + -0.013429681, + 0.045429856, + 9.2904486E-7, + -0.011094936, + -0.053897448, + 0.05120436, + -0.051203646, + 0.05109921, + -3.9564449E-4, + -0.0018849113, + -0.04667166, + 0.051931337, + 0.07190472, + 0.03911436, + 0.0045251944, + -0.048008155, + -0.03397076, + -0.028034845, + -0.048654392, + -0.02667819, + -0.04844982, + 0.04652294, + 0.08667334, + -0.03595206, + 0.0059883194, + 0.04574355, + -0.049042065, + -0.0949724, + -0.0883229, + 0.022961965, + 0.0010751986, + 0.034764428, + 0.07906372, + 0.063135885, + 0.011506904, + -0.01975833, + 0.036684997, + 0.060913093, + 0.045704674, + 0.007864406, + -0.10908467, + 0.05677562, + -0.011089532, + 0.038626347, + 0.009512805, + -0.064039044, + -0.072748266, + 0.077210315, + -0.038597148, + -0.035940252, + 0.028666161, + 0.07342884, + -0.05140841, + 0.03324692, + 0.087146066, + -0.063568234, + 0.046904817, + -0.101345256, + -0.089092165, + 0.020936692, + 0.03865168, + -0.05066454, + -0.020703398, + 0.037939124, + -0.069670096, + -0.04573288, + -0.042975515, + -0.08133061, + -0.04999254, + -0.07754444, + -0.015807157, + 0.005468936, + 0.058917798, + -0.047519706, + -0.011129669, + -0.023593048, + 0.017224371, + -0.08876406, + 0.021552147, + -0.0042216736, + 3.2073245E-4, + 0.020970272, + -0.018367162, + 0.05507523, + 0.049186505, + -0.053686555, + -0.05892317, + -0.04681065, + -0.0346258, + 0.025476422, + 0.018746119, + 0.07847266, + -0.061995696, + 0.054043338, + 0.05290739, + -0.03922319, + 0.09967812, + 0.11260788, + 0.079831325, + 0.038233027, + -0.007090767, + -0.025567437, + -0.059230927, + -0.0053755366, + -0.05934471, + 0.019243969, + 0.028365586, + -0.092337005, + -0.042283885, + -0.02478212, + 0.036973756, + 0.06046009, + -0.08319817, + -0.03466979, + 0.0052572396, + 0.03651634, + 0.0098519325, + 0.054537416, + 0.106752776, + -0.03245272, + -0.021710223, + 0.067718424, + -0.0716523, + -0.0467586, + 0.04351528, + -0.06902318, + 0.0840498, + -0.06641164, + 0.049778968, + 0.068722665, + 0.006945258, + 0.052571226, + -0.018321687, + 0.08851911, + -0.06484523, + -0.05621622, + 0.0138798375, + 0.062657684, + -0.044570502, + 0.04102728, + 0.018748704, + -0.00942585, + 0.031132046, + 0.028199397, + 0.04842188, + 0.05593715, + -0.059101623, + -0.06402159, + 0.098776296, + 0.02233127, + -0.026724212, + -0.0065241847, + -0.04349072, + -0.034313653, + 0.0035007112, + -0.05192231, + -0.038924325, + -0.06474185, + 0.015219527, + 0.015206849, + -0.006182916, + -0.047223445, + 0.03093224, + 0.0028494631, + 0.029578412, + -0.03084317, + -0.064933576, + 0.04518858, + -0.039695684, + 0.00936517, + -0.057235852, + 0.07411994, + -0.03560979, + -0.058608506, + 0.011952328, + 0.038545735, + -0.0027342425, + 0.034396514, + -0.05941442, + -0.059142824, + 0.07352255, + -0.043796647, + -0.02323201, + 0.021158574, + 0.04281619, + -0.06509553, + 0.025277078, + 0.028309572, + -0.025768865, + 0.017667482, + -0.054695044, + -0.0071169212, + 0.024850225, + 0.045802698, + -0.06463908, + -0.06887592, + 0.015381043, + 0.07519754, + 0.057192106, + 0.04958389, + -0.0055669746, + 0.011448934, + 0.03116414, + -0.047596138, + 0.0854336, + 0.04283707, + -0.0740198, + 0.012606065, + -0.06125597, + -0.051641334, + -0.08642954, + 0.051201824, + -0.06496548, + -0.052257292, + 0.042111978, + 0.06265747, + 0.020205691, + 0.030658716 + ] + }, + { + "offset": { + "field": "field", + "start": 6143, + "end": 7446 + }, + "embeddings": [ + 0.0424085, + -0.034002542, + -0.03464202, + -0.050363787, + 0.07952863, + -0.06934173, + 0.032258246, + 0.0323823, + 0.058361948, + 0.024646914, + 0.033364307, + 0.014893917, + 0.082809135, + -0.029873388, + -0.029152617, + 0.04554002, + 0.0795821, + -0.036626082, + -0.0474332, + -0.07305637, + 0.013581792, + -0.004326934, + -0.014103911, + 0.034649894, + -0.0026006806, + 0.02861443, + -0.04941399, + 0.04220857, + 0.03800667, + -0.08277502, + 0.0030204614, + -0.053834133, + 0.056124337, + -0.049811907, + 0.039426923, + 0.020071387, + -0.058887776, + -0.028534504, + 0.017018566, + -0.058147434, + -0.004793465, + 0.044247996, + 0.09460399, + 0.015196105, + 0.06281946, + 0.044713628, + -0.060649756, + 0.027246455, + -0.076060586, + -0.049838327, + -0.08404265, + 0.029550698, + -0.03708172, + 0.07957659, + 0.005638496, + -0.06591597, + -0.06454032, + -0.031200824, + -0.08628952, + 0.063782215, + 0.07779158, + -0.030862262, + -5.435849E-4, + 0.019658469, + 0.057832543, + 0.07795239, + 0.0381484, + -7.929322E-4, + -0.0592228, + -0.005782202, + -0.030597664, + 0.087376595, + -0.010526408, + -0.048925165, + -0.02034168, + 0.03517407, + 0.11462333, + -0.045529578, + 0.03299401, + -0.037767082, + -0.042070027, + -0.058737356, + -0.024921589, + 0.034654282, + -0.055172887, + 0.06289939, + 0.020921186, + -0.05699275, + 0.09581658, + -0.06115032, + 0.08512388, + 0.054141954, + -0.0934276, + -0.105145365, + -0.08745115, + -0.06042352, + -0.07095655, + 0.055074938, + 0.0759865, + -0.0045393603, + 0.06166128, + -0.0054426217, + -0.0013491446, + 0.020781914, + -0.013829525, + 0.012210793, + 0.0570243, + -0.026055835, + 0.050172452, + -0.0491802, + -0.03582268, + 0.0012494406, + 0.040490862, + 0.040501244, + -0.098037206, + -0.039755426, + -0.022896642, + 0.003485195, + 0.016366435, + -0.026002685, + 0.06318523, + -0.050691966, + -0.09513729, + -0.064722195, + -0.06132966, + -0.020495446, + 0.014939301, + 0.054761756, + 0.028909337, + -0.0023375573, + 0.042052656, + 0.022837669, + 0.0230999, + 0.03036407, + 0.018764673, + 0.072496034, + -0.036595833, + -0.036863085, + 0.028396215, + -0.091672495, + -0.08657466, + 0.047359336, + -0.055880774, + 0.0070424355, + 0.069609754, + 0.043904763, + 0.07389961, + -0.0059867557, + 0.116695836, + -0.03913718, + 0.036678135, + -0.010901363, + 0.08819442, + 0.03855831, + 0.07974421, + -0.051924232, + -0.10385839, + -0.033763383, + 0.019493395, + 0.050680365, + -0.058339395, + -0.02083137, + -0.08609875, + 0.017414644, + -0.063257225, + -0.056500446, + 0.023052368, + 0.04622413, + -0.018110551, + -0.007981176, + -0.024779806, + 0.0448911, + -0.08686634, + 0.06575812, + -0.04816167, + 0.049937073, + -0.04870519, + 0.078450456, + 0.06596584, + 0.026573703, + -0.054720048, + -0.016695132, + -0.06281992, + -0.033874605, + -0.034129698, + -0.018373003, + -0.050729766, + 0.037208032, + 0.08663066, + 0.0057553193, + 0.018936101, + 0.0683749, + -0.019277481, + -0.111216776, + -0.08299779, + 0.064380944, + -0.023994485, + 0.02228393, + 0.037532013, + 0.027998803, + 0.010780377, + -0.02866339, + 0.035218086, + 0.040947795, + 0.047251962, + 0.022822948, + -0.04361859, + 0.03929657, + -0.02838609, + 0.06326206, + 0.061787914, + -0.06487332, + -0.05326772, + 0.08467877, + -0.037987698, + -0.030701924, + 0.03693124, + 0.079549454, + -0.06695752, + 0.038511194, + 0.059876252, + -0.04255189, + 0.04926685, + -0.06254431, + -0.056073554, + 0.0059021385, + 0.06375891, + -0.028473105, + -0.020516206, + 0.053688798, + -0.0505003, + -0.013776076, + -0.056746498, + -0.074674286, + -0.036429465, + -0.078277834, + -0.033130404, + 0.026524864, + 0.010027121, + -0.052846454, + -0.03245234, + -0.0045730877, + 0.06279463, + -0.09209112, + 0.030202646, + -0.027974173, + -0.018735087, + 0.0051772078, + -0.034461137, + 0.031503055, + 0.024202514, + -0.0384219, + -0.028417397, + -0.0141932685, + -0.01493018, + 0.05603126, + 0.032856, + 0.0636288, + -0.08880921, + 0.0027978476, + 0.07799859, + -0.0328014, + 0.1109901, + 0.103224635, + 0.021524789, + 0.06495574, + 0.008971255, + -0.025503872, + -0.05471651, + -0.037969336, + -0.052947987, + 0.025896605, + 0.040142477, + -0.04655958, + -0.037604652, + -0.04057517, + 0.024616593, + 0.10586181, + -0.018084457, + -0.045486886, + 0.043346837, + 0.040528644, + 0.07145432, + 0.06723152, + 0.0444014, + 0.039035454, + -0.01685273, + 0.09862476, + -0.04053366, + -0.011219273, + 9.4339694E-4, + -0.04893209, + 0.08255836, + -0.06254635, + 0.0643953, + 0.057366677, + -0.035574544, + 0.05627519, + -0.053370558, + 0.07825556, + -0.0464488, + -0.06944344, + 0.06384285, + 0.022012226, + -0.059294943, + 0.015924655, + 0.015040029, + -0.024862552, + 0.0372234, + 0.07461155, + 0.037966266, + 0.05571149, + -0.062487237, + -0.05230138, + 0.09539987, + 0.050107345, + -0.045335423, + -0.008107003, + -0.04972419, + -0.053539097, + -0.022092147, + 0.0025375162, + -0.034666307, + -0.02502986, + -0.0051417495, + 0.051072195, + 0.0013976014, + -0.05035485, + 0.032701, + 0.029351933, + 0.030166088, + -0.056991193, + -0.05375353, + 0.046652608, + -0.0428863, + -0.029472742, + -0.052559793, + 0.091564216, + -0.080590494, + -0.0837016, + -0.019702932, + 0.039997194, + -0.006878238, + 0.03106036, + 0.0039084614, + -0.0647739, + 0.047937315, + -0.04196034, + -0.016512591, + 0.002820003, + 0.06303794, + -0.08405546, + 0.026794465, + 0.027069453, + -0.01786329, + 0.014802783, + -0.05162349, + -0.013761013, + -0.008544942, + 0.058489725, + -0.04009345, + -0.07866012, + 0.050363623, + 0.03921136, + 0.10168464, + 0.017203555, + -0.036566544, + -0.0041820146, + 0.017140131, + -0.04071419, + 0.028168127, + 0.04408699, + -0.051891476, + 0.018359438, + -0.05747516, + -0.042995404, + -0.050385248, + 0.016142845, + -0.097052485, + -0.054681405, + 0.015732065, + 0.04252675, + 0.04927429, + 0.034856237 + ] + }, + { + "offset": { + "field": "field", + "start": 7274, + "end": 8428 + }, + "embeddings": [ + 0.053351756, + -0.016210953, + -0.07376261, + -0.053941812, + 0.02817351, + -0.049927928, + 0.037769757, + 0.024953691, + 0.08055997, + 0.032674763, + 0.052936487, + 0.036146153, + 0.09430347, + -0.0028838688, + -0.007466441, + 0.023164729, + 0.10583723, + -0.031802896, + -0.041414317, + -0.0475711, + 0.009346337, + -0.0023871146, + -0.02213494, + 0.050703954, + 0.035117928, + 0.049729533, + -0.041396488, + 0.040562224, + 0.0072581097, + -0.08263742, + -0.0562156, + -0.015488454, + 0.05251555, + -0.093467265, + 0.023409631, + 0.025775665, + -0.044880413, + -0.049109295, + 0.047048803, + 0.0037931658, + -0.0067197834, + 0.06803116, + 0.07420838, + -5.630403E-4, + 0.081702, + 0.06873878, + -0.0719469, + 0.07724739, + -0.05212626, + -0.042729367, + -0.042923346, + 0.03461211, + 0.0384493, + 0.07852812, + 0.010787158, + -0.08513074, + -0.061220147, + -0.064391315, + -0.05105939, + 0.052274473, + 0.051858254, + -0.025238348, + -0.00587187, + 0.027783165, + 0.08390886, + 0.09118287, + 0.0045411596, + -0.007192923, + -0.03402139, + -0.0055287075, + -0.023308607, + 0.048499316, + 0.0056659714, + -0.055594128, + 0.006816471, + 0.06142901, + 0.069629386, + -0.06880756, + 0.03697912, + 4.030213E-4, + -0.016491663, + -0.04839326, + -0.07392797, + 0.043547455, + -0.056421243, + 0.04223018, + 0.08332315, + -0.067911245, + 0.090487525, + -0.055714566, + 0.08206281, + 0.06703987, + -0.08389162, + -0.057403725, + -0.08070137, + -0.08085191, + -0.06221053, + 0.022357801, + 0.05380439, + -0.057247546, + 0.082033284, + -0.040765326, + 0.013981313, + -0.0040798467, + -0.026184458, + 0.041849125, + 0.0670039, + -0.054438762, + 0.05614216, + -0.042283792, + -0.011577375, + -0.005841353, + 0.053594112, + 0.046762522, + -0.052612707, + -0.057888422, + -0.041523386, + -0.024746502, + -0.0075298445, + -0.064313106, + 0.07056589, + -0.060802132, + -0.066174984, + -0.028887944, + -0.045796074, + -0.032927513, + 0.020563344, + 0.03263002, + 0.062557735, + 0.017696919, + 0.07386037, + 0.03261784, + 0.049800515, + 0.030138545, + 0.08249261, + 0.09115441, + -0.042155825, + -0.03988317, + 0.020776471, + -0.051469974, + -0.08725858, + 0.03421217, + -0.05013289, + 0.013482718, + 0.064455256, + 0.03295194, + 0.05072303, + 0.006866378, + 0.07478394, + -0.08232063, + 0.019163597, + 0.004971397, + 0.04126514, + 0.058498725, + 0.051773094, + -0.075701, + -0.10187357, + -0.04737017, + 0.024935009, + 0.05112209, + -0.06950842, + -0.043909222, + -0.08784876, + 0.024858471, + -0.09546347, + -0.066443644, + -0.039961666, + 0.038705625, + 0.024331694, + -8.98396E-4, + -0.05572306, + 0.029712915, + -0.03771733, + 0.03198425, + -0.018850418, + 0.029596135, + -0.03073546, + 0.040810063, + 0.05748256, + 0.073663406, + -4.3307795E-4, + -0.012033559, + -0.04193751, + -0.025243256, + 0.0020644036, + -0.045018397, + -0.041560806, + 0.052930553, + 0.019955857, + -0.026577184, + -0.008272473, + 0.021633727, + -0.025493031, + -0.0703225, + -0.06678734, + 0.03229182, + 0.0071383226, + 0.034542687, + 0.059906006, + 0.053990763, + -0.03435307, + -0.013460787, + 0.0066855447, + 0.06581118, + 0.03435488, + -0.013016893, + -0.06384082, + 0.04292309, + -0.01003905, + 0.07465682, + 0.041681886, + -0.09872228, + -0.073181555, + 0.06117674, + -0.037698943, + -0.04354557, + 0.015390995, + 0.016960131, + -0.08594164, + -0.0031558785, + 0.053712446, + -0.022476645, + 0.049800374, + -0.091516644, + -0.054994736, + 0.0021578616, + 0.0319539, + -0.037861917, + -0.035363257, + 0.029294293, + -0.038181435, + -0.032684956, + -0.059862334, + -0.052932844, + -0.058168415, + -0.09271316, + -0.03091905, + 0.058375362, + 0.033076484, + -0.048589885, + -0.0471485, + -0.036419317, + 0.0197355, + -0.09041303, + 0.005987353, + -0.04762716, + -0.025347468, + 0.01992799, + -0.040301844, + 0.028963821, + 0.04351864, + -0.07274519, + -0.029667713, + 0.002675472, + -0.008265489, + 0.024745574, + 0.015290826, + 0.05244983, + -0.06499378, + 0.062229145, + 0.056773033, + -0.013647868, + 0.10126457, + 0.07742867, + 0.06907199, + 0.064441785, + -0.03506488, + -0.0027899756, + -0.043987043, + -0.049338706, + -0.06806032, + 0.025320068, + 0.07688298, + -0.037168447, + -0.015209554, + -0.04958993, + 0.029053042, + 0.078892104, + -0.05066037, + -0.030179376, + 0.047830258, + 0.05499768, + 0.04351645, + 0.052307993, + 0.044633888, + 0.020583658, + -0.033953577, + 0.095311515, + -0.0630289, + 0.007157878, + 0.038106248, + -0.035896186, + 0.082412794, + -0.029322542, + 0.09868366, + 0.055021353, + -0.0075476193, + 0.06234535, + -0.070212856, + 0.059051443, + -0.034478117, + -0.062892415, + 0.051439803, + 0.027673196, + -0.08141708, + 0.051184427, + 0.0028761302, + 0.016736014, + 0.05301783, + 0.070441864, + 0.034725133, + 0.07278133, + -0.034562826, + -0.08274096, + 0.04781931, + 0.067391045, + -0.028286146, + 0.045300007, + -0.070981935, + -0.0900906, + -0.01804769, + -0.07678485, + -0.054171197, + -0.04371682, + 0.044014435, + 0.019092314, + -0.0533041, + -0.05406611, + 0.001399687, + 0.008414226, + 0.0070721963, + -0.054595735, + -0.06279298, + 0.012740916, + -0.068271995, + -0.016297301, + -0.018569002, + 0.07028272, + -0.021509787, + -0.07611714, + 0.00775331, + 0.043958176, + -0.015166803, + 0.057754774, + -0.013378479, + -0.06428601, + 0.033813998, + -0.03535889, + -0.0053371727, + 0.022787765, + 0.0827088, + -0.12142623, + 0.0026697267, + 0.03981775, + -0.02158926, + 0.03722548, + -0.04657821, + -0.049696047, + 0.027757794, + 0.046377983, + -0.049581885, + -0.08924511, + 0.035119716, + 0.07465048, + 0.07677282, + 0.053386416, + -0.020686079, + 0.013271858, + 0.057107273, + -0.016681688, + 0.015427299, + 0.046444256, + -0.0758986, + 0.03103317, + 0.0036917871, + -0.07186075, + -0.0624062, + 0.043409187, + -0.054538824, + -0.065254256, + 0.05370674, + 0.03439175, + 0.02197341, + 0.025227817 + ] + }, + { + "offset": { + "field": "field", + "start": 8427, + "end": 9687 + }, + "embeddings": [ + 0.05744903, + -0.02452922, + -0.08476994, + -0.022428924, + 0.048399355, + -0.036132727, + -0.015275069, + 0.074007444, + 0.07940483, + 0.02248898, + 0.04316835, + -0.0034011744, + 0.08490044, + -4.1730207E-5, + -0.038465198, + 0.047819026, + 0.072968654, + -0.0597117, + 0.01257942, + -0.058731165, + 0.01321756, + 0.015429294, + -0.04443649, + 0.067764916, + 0.032255292, + 0.057302598, + -0.013705533, + -0.002871075, + -0.0017963633, + -0.076624624, + -0.04033174, + -0.03958768, + 0.021592977, + -0.083355255, + 0.02508422, + 0.014075689, + -0.042936496, + -0.069775715, + 0.053771127, + -0.06096773, + -0.038709678, + 0.030585166, + 0.06309865, + 0.0289272, + 0.070409566, + 0.075638674, + -0.039296776, + 0.02741248, + -0.041558262, + -0.009545241, + -0.060929116, + 0.017809264, + 0.04246089, + 0.092424795, + 0.0044749626, + -0.084972195, + -0.071833394, + -0.05189755, + -0.05925639, + 0.07651771, + 0.051788367, + -0.053483434, + -0.021773372, + -0.00506648, + 0.038404945, + 0.06250312, + 0.061945193, + 0.011288415, + -0.060019504, + -0.026446447, + -0.055844307, + 0.06780296, + -0.03332657, + -0.048795506, + 0.03756737, + 0.045220662, + 0.034406263, + -0.058406588, + 0.018282196, + -0.09083589, + -0.03040247, + -0.05790508, + -0.016188977, + 0.022804815, + -0.056110263, + 0.07543798, + 0.038187183, + -0.08649141, + 0.08623204, + -0.042687863, + 0.0573812, + 0.050730344, + -0.0433588, + -0.09344185, + -0.046142764, + -0.07739427, + -0.05609858, + 0.052337695, + 0.053889126, + -0.05016094, + 0.096083306, + 0.011468343, + 0.042769995, + 0.008584574, + -0.028934095, + 0.029772492, + 0.05292526, + -0.024594065, + 0.08542614, + -0.066132575, + -0.0076108603, + 0.0075524007, + 0.09586245, + 0.07127726, + -0.08062749, + -0.06285386, + -0.034123085, + 0.053412784, + 0.03723955, + -0.033416737, + 0.04680435, + -0.03861024, + -0.027420327, + -0.081069514, + -0.059449777, + -0.023693249, + 0.023154624, + 0.052628066, + 0.053673804, + 0.03851477, + 0.048254706, + 0.040450633, + 0.024582013, + 0.030465266, + 0.07089921, + 0.087507665, + -0.009536147, + -0.014239722, + 0.0023720453, + -0.03707558, + -0.025194108, + 0.08157714, + -0.03958548, + 0.051691998, + 0.06314976, + 0.02721075, + 0.052713513, + -0.023559293, + 0.06393838, + -0.07106552, + 0.044660386, + -0.025641244, + 0.06264186, + 0.014594412, + 0.048385747, + -0.055564065, + -0.06955722, + -0.088032804, + 0.034305904, + 0.045169048, + -0.03802287, + -0.013604237, + -0.08036378, + 0.022200659, + -0.055803996, + -0.084766835, + -0.03537992, + 0.0466811, + -0.01768934, + -0.04932191, + -0.028891142, + 0.0119931735, + -0.030645167, + 0.02563793, + 0.011760058, + 0.02289236, + -0.052902717, + 0.0097223595, + 0.042422734, + 0.020096473, + 0.0088921515, + -0.013737467, + -0.03993987, + -0.05381494, + -0.04218381, + -0.03449234, + -0.054990627, + 0.009642538, + 0.05949224, + -0.007698366, + 0.027766742, + 0.031446908, + -0.08122337, + -0.038493186, + -0.06830541, + 0.020205725, + -0.030477056, + 0.044251017, + 0.08096215, + 0.10125872, + -0.009518375, + -0.018208051, + 0.04083479, + 0.021746838, + 0.030360037, + 0.0030146895, + -0.04425533, + 0.063152075, + -0.040584363, + 0.07283654, + 0.062402766, + -0.072093405, + -0.07191966, + 0.041823577, + 0.004934987, + -0.037696403, + 0.032516938, + 0.072518826, + -0.06659665, + -0.006708449, + 0.07320258, + -0.028489655, + 0.0686214, + -0.07320168, + -0.03665047, + 0.020025352, + 0.018766245, + -0.025394067, + -0.043893065, + 0.013678436, + -0.0817917, + -0.02630837, + -0.03421568, + -0.0654703, + -0.042911462, + -0.07311668, + -0.0038604182, + 0.016762605, + 0.021780867, + -0.06629608, + -0.012976095, + -0.051092017, + 0.011383003, + -0.11568767, + 0.056158062, + -0.011376737, + 0.020621011, + 0.015717132, + -0.01347594, + 0.018848866, + 0.039923675, + -0.06502122, + -0.044894896, + -0.032492988, + -0.035042934, + 0.045391146, + 0.047973733, + 0.10662139, + -0.056172207, + 0.031413678, + 0.0125645455, + -0.003751948, + 0.07743928, + 0.084872924, + 0.047170028, + 0.046972826, + -0.00976389, + -0.032883007, + -0.054116864, + -0.027746534, + -0.08914457, + 0.007070583, + 0.04398771, + -0.0475649, + -0.06489332, + -0.060108416, + 0.0143431965, + 0.05955711, + -0.0774654, + -0.030995058, + 0.05263145, + 0.029864812, + 0.01608842, + 0.09080374, + 0.05185686, + -0.032855753, + 0.0063909087, + 0.0853062, + -0.10142854, + -0.07251046, + -0.005085327, + -0.066178784, + 0.046009053, + -0.09079122, + 0.08566233, + 0.06576406, + 0.017733688, + 0.06487284, + -0.039741356, + 0.04176326, + -0.010695733, + -0.050619148, + 0.01245912, + 0.03467508, + -0.06871932, + 0.030141022, + 0.026552299, + -0.0035028423, + 0.030276356, + 0.05361378, + 0.054491397, + 0.06513585, + -0.08491482, + -0.051875558, + 0.086067244, + 0.0396853, + -0.054731067, + 0.016796874, + -0.036002953, + -0.0658579, + -0.016491668, + -0.0758324, + -0.039184928, + -0.068875834, + 0.031522863, + 0.009083638, + -0.024529556, + -0.059996516, + 0.06894157, + -0.033383097, + -0.002836109, + -0.044933245, + -0.09211297, + 0.075231805, + -0.09029687, + -0.0025871666, + -0.02342682, + 0.06138579, + -0.052864984, + -0.078638926, + 0.020620788, + 0.011810836, + -0.014471281, + 0.01986825, + -0.040482074, + -0.06512211, + 0.062289387, + -0.03012425, + -4.7029057E-4, + 0.035347983, + 0.11894842, + -0.050478995, + -0.014397127, + 0.049630538, + -0.01540003, + 0.052197892, + -0.048483927, + -0.0076621673, + 0.04089758, + 0.015284395, + -0.023267174, + -0.0582655, + 0.035793785, + 0.06800681, + 0.11031594, + 0.10364201, + -0.042768136, + 0.03487297, + 0.03780645, + -0.040866226, + 0.046048936, + 0.029865082, + -0.04171421, + 0.03842289, + -0.0154759055, + -0.020621978, + -0.05873017, + 0.05175785, + -0.03108134, + -0.08132814, + 0.04200817, + 0.05092204, + 0.02828486, + 0.06530922 + ] + }, + { + "offset": { + "field": "field", + "start": 9554, + "end": 10460 + }, + "embeddings": [ + 0.06795254, + -0.027788855, + -0.06532636, + -0.05325019, + 0.05093753, + -0.055382267, + 0.051724233, + 0.03824768, + 0.07362302, + 0.04002248, + 0.0550001, + -4.6239374E-4, + 0.09122236, + 0.026436811, + -0.06672792, + 0.011183016, + 0.097761884, + -0.082106106, + -0.028012855, + -0.062181316, + -0.008753627, + -0.019600896, + -0.036626942, + 0.03447432, + 0.06013969, + 0.05223775, + -0.016101984, + -0.010203473, + 0.025855985, + -0.060056984, + -0.03937554, + -0.043756496, + 0.030997807, + -0.10395428, + 0.027634699, + 0.005324417, + -0.024001809, + -0.019012816, + 0.057175636, + -0.04684799, + -0.053544424, + 0.010870207, + 0.029463693, + 0.012833155, + 0.09024689, + 0.07413883, + -0.032765836, + 0.015112767, + -0.026345447, + -0.061428167, + -0.03721613, + 0.049783345, + -0.0010639617, + 0.09159631, + 0.02264281, + -0.055135295, + -0.05914746, + -0.052138176, + -0.07942767, + 0.029073795, + 0.02702419, + -0.0532197, + -0.014727404, + 6.2745955E-4, + 0.029936861, + 0.07468935, + 0.017335532, + 0.059831787, + -0.049344696, + 0.01880937, + -0.04200233, + 0.067229606, + -0.0012889965, + -0.0632363, + 0.043949638, + 0.049309365, + 0.036185548, + -0.058062393, + 0.04830483, + -0.02813847, + -0.06845039, + -0.046040177, + 0.0015298559, + 0.0377331, + -0.028661495, + 0.077781945, + 0.050341487, + -0.060820885, + 0.07588156, + -0.034051448, + 0.0768756, + 0.048993148, + -0.066278465, + -0.09077045, + -0.06511732, + -0.04687162, + -0.06367129, + 0.04149166, + 0.07077744, + -0.041742414, + 0.054795545, + -0.029036827, + 0.06274923, + 0.0061874306, + -0.0317641, + 0.0038963803, + 0.056757834, + -0.043710202, + 0.06571763, + -0.039196618, + 0.011638405, + -0.014480316, + 0.0782259, + 0.041573986, + -0.056704924, + -0.044138405, + -0.032456245, + 0.025932135, + 0.044077054, + -0.007750241, + 0.054932345, + -0.03292227, + -0.028155934, + -0.08390399, + -0.044745676, + -0.039294515, + 0.020936523, + 0.059180506, + 0.09919356, + 0.04366707, + 0.02902992, + 0.041400306, + 0.009594294, + -0.0058838082, + 0.042111326, + 0.12996204, + -0.038631447, + -0.019210441, + 0.056625124, + -0.057970613, + -0.08764153, + 0.08361837, + -0.016109295, + 0.030824538, + 0.061104048, + 0.05500983, + 0.045061268, + -0.055872414, + 0.08214088, + -0.046806127, + 0.057676565, + -0.055537637, + 0.072990045, + 0.045658644, + 0.06032115, + 0.0016026857, + -0.08040042, + -0.082738034, + 0.021192942, + 0.06619772, + -0.060728885, + -0.012204158, + -0.05736885, + 0.011759795, + -0.09559732, + -0.03487954, + -0.004853385, + 0.07568596, + 0.0170426, + -0.032266848, + -0.034448244, + 0.0015031097, + -0.051096436, + 0.067675546, + -0.008337999, + 0.02016469, + -0.034166988, + 1.3699784E-4, + 0.036702186, + 0.03628234, + -0.034941807, + 0.00841879, + -0.034299497, + -0.045383744, + -0.021920165, + -0.037155207, + -0.012305447, + -0.018064288, + 0.041540947, + -0.013256499, + -0.01824263, + 0.027535202, + -0.10648625, + -0.10162097, + -0.08293666, + 0.048940066, + -0.008739751, + 0.03177586, + 0.06314134, + 0.08493229, + -0.03178613, + -0.04156539, + 0.021325408, + 0.015963139, + 0.030367697, + 0.0012957318, + -0.054147527, + 0.049582303, + -0.058355026, + 0.059193954, + 0.080090895, + -0.0643068, + -0.078586616, + 0.061390623, + -0.035683163, + -0.05697204, + 0.016509915, + 0.0456678, + -0.06869852, + -0.021377739, + 0.0459535, + -0.034752127, + 0.044991855, + -0.07434412, + -0.061407465, + 0.0066419234, + 0.04971079, + -0.020545505, + -0.03348485, + 0.072175615, + -0.08419868, + -0.06409017, + -0.028774735, + -0.08295683, + -0.067340076, + -0.052633435, + -0.02234827, + -0.0048523103, + 0.036146127, + -0.041977044, + -0.04563754, + -0.056249525, + 0.040496923, + -0.07444201, + 0.042330384, + -0.034291748, + -0.037762616, + 0.035350475, + -0.023694497, + 0.044436026, + 0.034658603, + -0.071404554, + -0.052793555, + -0.030472925, + -0.023625389, + 0.043577187, + 0.05148583, + 0.09133436, + -0.054247066, + 0.04203479, + 0.030377146, + 0.0089587355, + 0.08783934, + 0.102596834, + 0.004045215, + 0.041863658, + -0.049759816, + -0.041472945, + -0.06560168, + -0.049456153, + -0.06286144, + -0.0021516692, + 0.06415723, + -0.057984285, + -0.052246314, + -0.0468379, + 0.005024449, + 0.063394494, + -0.049811874, + -0.007827677, + 0.043182477, + 0.03432028, + 0.059190553, + 0.051201522, + 0.06459717, + -0.0028205558, + 0.011427307, + 0.07478203, + -0.09011506, + -0.06896538, + 0.015105613, + -0.06902061, + 0.048208747, + -0.076154344, + 0.05893959, + 0.026351677, + -0.013113587, + 0.038620975, + -0.020734645, + 0.042907227, + -0.02616936, + -0.012401203, + 0.036476728, + 0.031379998, + -0.07657323, + 0.07557042, + 0.017815659, + 0.057302337, + 0.031211596, + 0.041240353, + 0.06864739, + 0.056433342, + -0.05830147, + -0.027380649, + 0.054324336, + 0.07243749, + -0.044019613, + 0.0029616277, + -0.061004672, + -0.06978305, + -0.055067733, + -0.06398177, + -0.025761655, + -0.031062664, + 0.038432557, + 0.01983404, + -0.022323918, + -0.08653916, + 0.036503706, + 0.027113369, + 0.051526625, + 0.003591905, + -0.043091606, + 0.048455648, + -0.06892166, + -0.007492171, + -0.018578587, + 0.05494636, + -0.05301073, + -0.094928786, + 0.003945227, + 0.033395912, + -0.034273494, + 0.06995625, + -0.024217183, + -0.06057119, + 0.022178173, + -0.048596364, + -0.03847148, + 0.01584574, + 0.08880866, + -0.09683496, + 0.040496774, + 0.0554991, + -0.0325551, + 0.066031836, + -0.07693793, + -0.014788223, + 0.013764252, + 0.04855808, + -0.037729017, + -0.037790805, + 0.033332434, + 0.09727558, + 0.09606235, + 0.07886385, + -0.017046498, + -0.0047775926, + 0.049902774, + -0.06325739, + 0.032437313, + 0.054471422, + -0.06110438, + 0.020669593, + 0.0070950463, + -0.026809083, + -0.05658399, + 0.048453655, + -0.048016146, + -0.047978207, + 0.046292298, + 0.046507128, + 0.022924135, + 0.07091171 + ] + }, + { + "offset": { + "field": "field", + "start": 10459, + "end": 11696 + }, + "embeddings": [ + 0.053117193, + -0.015585508, + -0.05423901, + -0.05138859, + 0.06471939, + -0.07901192, + 0.01693148, + 0.050192464, + 0.09575295, + 0.043414578, + 0.011923588, + -0.009796319, + 0.05132375, + -0.014788656, + -0.025382983, + 0.028342921, + 0.06872216, + -0.055240728, + -0.018316492, + -0.053359665, + 0.013118909, + -0.01603142, + -0.05637189, + 0.060144503, + 0.078957014, + 0.052481424, + -0.063893974, + 0.012951693, + 0.037284218, + -0.0989329, + -0.031654015, + -0.018283853, + 0.048968345, + -0.04817539, + 0.026837517, + 6.222096E-5, + -0.024189027, + 0.0112748975, + 0.0207289, + -0.012949756, + -0.03303762, + 0.04864192, + 0.028754367, + 0.025379542, + 0.047412705, + 0.051565237, + -0.057438288, + 0.032263443, + -0.06824788, + -0.019407846, + -0.059993997, + 0.06091319, + -0.0069746193, + 0.099794194, + -6.160557E-4, + -0.09514187, + -0.09679237, + -0.048262954, + -0.047733244, + 0.09921752, + 0.0332093, + -0.04550775, + -0.05143887, + 0.014637188, + 0.07129097, + 0.09510039, + 0.032291826, + 0.034658313, + -0.017751144, + -0.044415683, + -0.05508973, + 0.067787856, + -0.031251505, + -0.044856634, + -0.0033598887, + 0.04760263, + 0.054377872, + -0.040341455, + 0.018044798, + -0.023554679, + -0.049902994, + -0.031270802, + -0.007537713, + 0.03402409, + -0.027234826, + 0.09944215, + 0.045163024, + -0.05408758, + 0.04893289, + -0.048714437, + 0.061802126, + 0.06499505, + -0.07351746, + -0.08406793, + -0.08679661, + -0.051905084, + -0.034713045, + 0.040123433, + 0.07967649, + -0.06041734, + 0.061612148, + -0.020921662, + 0.05665623, + 0.041377034, + -0.007348656, + 0.015952924, + 0.05296665, + -0.052709162, + 0.071244985, + -0.038275376, + -0.01164368, + 0.014391718, + 0.06113161, + 0.034303535, + -0.050069015, + -0.070354894, + -0.011464902, + -0.028307518, + 0.04133397, + -0.049779277, + 0.08302818, + -0.048584647, + -0.06805662, + -0.04735593, + -0.04913521, + -0.005428242, + 0.03233016, + 0.044904802, + 0.06872594, + -0.01780296, + 0.06279163, + 0.039817583, + 0.007986946, + 0.0121078305, + 0.074653216, + 0.12367899, + -0.037977446, + -0.02724532, + 0.021000944, + -0.07356985, + -0.06435206, + 0.013165806, + -0.004956233, + 0.038697783, + 0.0691664, + 0.041731402, + 0.06331449, + -0.0046027564, + 0.078827925, + -0.028814215, + 0.02115456, + -0.030129815, + 0.071896814, + 0.005554397, + 0.060193166, + -0.016858125, + -0.067393966, + -0.06267468, + 0.019814175, + 0.045483287, + -0.093514144, + -0.026769608, + -0.091787525, + 0.019846648, + -0.092779495, + -0.04158812, + 0.01915316, + 0.03254872, + -9.048901E-4, + 0.014318523, + -0.013507805, + 0.040734824, + -0.047447592, + 0.052798737, + -0.04318385, + 0.020014195, + -0.05593343, + 0.045916248, + 0.059790693, + 0.032657895, + -0.038109995, + -0.006277211, + -0.042986337, + -0.090472385, + -0.003131573, + -0.0031761206, + -0.047034763, + 0.023323804, + 0.06837014, + -0.0039844033, + 0.002091333, + 0.021284567, + -0.052262813, + -0.082303464, + -0.052592263, + 0.032273255, + 0.009803746, + 0.0013473934, + 0.05583177, + 0.06429418, + 0.001517824, + -0.0072332155, + 0.020265736, + 0.0033951101, + 0.058746524, + 0.022550192, + -0.07356808, + 0.045694035, + -0.018434413, + 0.025329743, + 0.061233632, + -0.07960079, + -0.08406201, + 0.06330057, + -0.09599598, + -0.025174508, + 0.028187213, + 0.067423336, + -0.06526651, + 0.006772753, + 0.07572871, + -0.031995185, + 0.05916029, + -0.047238894, + -0.06869731, + 0.032157637, + 0.07589699, + -0.043760736, + -0.013728161, + 0.053143077, + -0.021147272, + -0.049572222, + -0.04899867, + -0.08653491, + -0.07939669, + -0.10523086, + -0.001059735, + 0.0063834176, + 0.015314108, + -0.05562784, + -0.056119584, + -0.08270628, + -3.2258706E-4, + -0.083571695, + 0.052503087, + -0.03977744, + -0.047228433, + 0.07893777, + -0.017052159, + 0.035709318, + 0.04999642, + -0.100927435, + -0.025071207, + -0.046351615, + -0.026675632, + 0.025651569, + 0.068944395, + 0.031405594, + -0.07291537, + 0.018124148, + 0.018039903, + -0.034970153, + 0.10088425, + 0.09433116, + 0.0689122, + 0.049935102, + -0.02560692, + -0.06034739, + -0.060196366, + -0.02504903, + -0.058731087, + 0.04172741, + 0.0038506123, + -0.035828065, + -0.018227967, + -0.07467086, + 0.037910078, + 0.054497574, + -0.07775332, + -0.017336372, + 0.046693277, + 0.022060173, + 0.036212, + 0.046006728, + 0.027395774, + -0.020391421, + -0.029180788, + 0.05312558, + -0.072161354, + 0.016918298, + -0.025024151, + -0.031315047, + 0.08490075, + -0.03845013, + 0.047562983, + 0.021411635, + -0.023305604, + 0.039255943, + -0.026875794, + 0.08610026, + -0.029386222, + -0.016845187, + 0.054429937, + 0.027040144, + -0.06772479, + 0.095606916, + -0.036488708, + 7.7485084E-4, + 0.037060957, + 0.07791183, + 0.07910346, + 0.013702623, + -0.03475561, + -0.040810455, + 0.0774657, + 0.043717105, + -0.05542658, + 0.0030442073, + -0.1050271, + -0.07705069, + -0.029897174, + -0.03622423, + -0.044971265, + -0.06206865, + 0.019566234, + 0.023725986, + 0.010738356, + -0.07149888, + 0.002263669, + 0.023846326, + 0.037898906, + 0.008864181, + -0.03436943, + 0.03523395, + -0.061920922, + -0.022051072, + -0.07035821, + 0.09721548, + -0.047868855, + -0.09084715, + 0.01050229, + 0.06422868, + 0.02094103, + 0.038811173, + -0.023608131, + -0.04335279, + 0.053133078, + -0.021861738, + -0.039793, + 0.048549335, + 0.07316228, + -0.08636803, + 0.017843066, + 0.06287863, + -0.034799643, + 0.06658666, + -0.042144388, + -0.025062915, + -0.005463377, + 0.024971562, + -0.05923357, + -0.041639276, + 0.039569613, + 0.06571587, + 0.096652776, + 0.061983064, + -0.036815662, + 0.0028833281, + 0.05262061, + -0.05568962, + 0.05730981, + 0.08141181, + -0.030994864, + 0.020174727, + -0.06336232, + -0.012292672, + -0.02354779, + 0.03636813, + -0.062137593, + -0.06593778, + 0.008968277, + 0.08741745, + -0.0025689485, + 0.043467455 + ] + }, + { + "offset": { + "field": "field", + "start": 11635, + "end": 12404 + }, + "embeddings": [ + 0.060443457, + -0.018814357, + -0.073390484, + -0.072757736, + 0.07602336, + -0.08031318, + 0.049980927, + 0.09587944, + 0.051309362, + 0.06949769, + -0.0072211474, + -0.015413545, + 0.07187972, + -0.027608033, + -0.050633453, + 0.0033393581, + 0.08592932, + -0.05384897, + 0.0090165865, + -0.067287035, + 0.047661647, + -0.034592163, + -0.039346103, + 0.0146116605, + 0.071183585, + 0.08697948, + -0.024614712, + 0.028339177, + 0.022019284, + -0.09268575, + -0.019253781, + -0.041030932, + 0.007305104, + -0.07492374, + 0.053613797, + 0.01886426, + -0.04425684, + -0.024854647, + 0.031085161, + -0.017220812, + -0.020209908, + 0.05369729, + 0.03976705, + 0.029386787, + 0.050238505, + 0.054753933, + -0.06296793, + -0.0058290027, + -0.03582435, + -0.017357286, + -0.02952249, + 0.08404001, + 0.005996583, + 0.07228626, + 0.0453729, + -0.100141585, + -0.092281535, + -0.04168767, + -0.10046059, + 0.1075754, + 0.024743102, + -0.056973584, + -0.035330076, + 0.011421968, + 0.030275127, + 0.09172467, + 0.017434414, + 0.015847225, + -0.0726862, + -0.06845117, + -0.055281464, + 0.067980886, + -3.857295E-4, + -0.054992713, + 0.0075252233, + 0.047023434, + 0.035014, + -0.012921049, + 0.02088017, + -0.058113724, + -0.025709266, + -0.054443315, + -0.019515503, + 0.064740464, + -0.05698313, + 0.09463141, + 0.04497404, + -0.049769837, + 0.0833754, + -0.029804397, + 0.048232727, + 0.06960264, + -0.0549942, + -0.052367542, + -0.053988345, + -0.043551333, + -0.0440573, + 0.0228508, + 0.06116432, + -0.034144748, + 0.07046748, + 0.030397533, + 0.08092524, + 0.01595668, + -0.040372074, + 0.005287498, + 0.08518292, + -0.085493654, + 0.07491553, + -0.057637572, + -0.052299142, + 0.025728408, + 0.061017167, + 0.062338777, + -0.025426613, + -0.046602402, + -0.00770177, + -0.022468466, + 0.04037256, + -0.027729545, + 0.049696933, + -0.04159955, + -0.03250282, + -0.05583671, + -0.057482447, + -0.013210838, + 0.010765793, + -6.568303E-4, + 0.01951912, + 0.0042298064, + 0.06481922, + 0.051263362, + 0.03024384, + 0.0143968, + 0.057499222, + 0.09256815, + -0.057120778, + 0.0056950618, + 0.009500937, + -0.085512474, + -0.062135834, + 0.03066087, + -0.022205362, + 0.04599781, + 0.03531616, + 0.033788696, + 0.092292726, + -0.010158623, + 0.080052234, + -0.0060746367, + 0.05273896, + -0.017564675, + 0.057575084, + -0.005175612, + 0.030423889, + -0.04613064, + -0.067384765, + -0.0777474, + 0.050731033, + 0.06055307, + -0.057881925, + -0.05091726, + -0.09201947, + 0.004303206, + -0.055900693, + -0.0481762, + 0.016243042, + 0.027040469, + -0.0034547276, + -0.049395755, + -0.011644979, + 0.080957845, + -0.058048993, + 0.0492391, + -0.0063328324, + -2.4730185E-4, + -0.07200027, + 0.09804746, + 0.048087306, + 0.048413623, + -0.043330252, + -0.008462916, + -0.046491988, + -0.070683904, + -0.04057368, + 0.0125348065, + -0.059007447, + 0.038095772, + 0.041568384, + -0.024388209, + -0.013926745, + -0.009171631, + -0.05361981, + -0.04222372, + -0.02127755, + 0.051767804, + 0.0061274986, + 0.053409755, + 0.035079833, + 0.027605304, + -0.013933335, + -0.031408813, + 0.005120374, + 0.020053213, + 0.039294656, + -0.005544306, + -0.10680002, + 0.034485042, + -0.02083935, + 0.04792578, + 0.068743885, + -0.11507496, + -0.10216752, + 0.056888673, + -0.06800507, + -0.059926618, + 0.008200659, + 0.030621173, + -0.059572708, + 0.020859051, + 0.11224187, + -0.026253646, + 0.05764227, + -0.047062173, + -0.056426648, + -0.018189454, + 0.06514884, + -0.060741644, + -0.039313216, + 0.011433455, + -0.038083345, + -0.05282726, + -0.052797362, + -0.08434047, + -0.06285792, + -0.058861967, + -0.059305865, + 0.004766285, + 0.06191272, + -0.061296433, + -0.05848144, + -0.038482025, + 0.033259515, + -0.11248364, + 0.017305091, + -0.024461089, + -0.03555484, + 0.0663307, + -0.014705792, + 0.014617273, + 0.04280535, + -0.074889824, + -0.052947134, + -0.030037379, + -0.0077148285, + 0.057981927, + 0.047073305, + 0.06093273, + -0.05974137, + 0.064445026, + 0.029471356, + 9.505361E-4, + 0.08606595, + 0.086340785, + 0.046603594, + 0.05858932, + -0.032077473, + -0.019977393, + -0.03431287, + -0.047011334, + -0.056820385, + 0.013462027, + 0.042805202, + -0.0677109, + -0.023264, + -0.05485633, + 0.055690948, + 0.05651245, + -0.071241796, + -0.00915478, + 0.053730387, + 0.056755595, + 0.03846687, + 0.05310068, + 0.0056861867, + 0.005360462, + -0.039538994, + 0.03497575, + -0.06781882, + -0.011839252, + -0.042778153, + -0.041556057, + 0.034042798, + -0.04711682, + 0.034821853, + 0.043264013, + 0.01696988, + 0.048099734, + -0.019220253, + 0.11707801, + -0.054026723, + -0.018543418, + 0.03708661, + 0.032689948, + -0.05068468, + 0.0691382, + -8.202863E-4, + -0.0019999356, + 0.014463376, + 0.07405446, + 0.07647231, + 0.011515665, + -0.034625884, + -0.055066366, + 0.058186855, + 0.024419852, + -0.031584322, + -0.029543389, + -0.07736987, + -0.07829203, + -0.0053546038, + -0.06570245, + -0.010904349, + -0.06490494, + 0.03149016, + 0.009859199, + -0.052703284, + -0.06884288, + 0.02139801, + 0.026021063, + 0.07129519, + -0.022009108, + -0.041722506, + 0.07192836, + -0.01098611, + -0.0027591465, + -0.04109422, + 0.046898007, + -0.036396515, + -0.090931825, + 0.022607114, + 0.030457368, + -0.03448933, + 0.021621844, + -0.048675153, + -0.033822104, + 0.057124067, + -0.048874307, + -0.054428495, + 0.056644462, + 0.08236624, + -0.08848753, + 0.033674665, + 0.053105284, + -0.019323535, + 0.05071626, + -0.025313161, + 0.0112252515, + 0.018250609, + 0.07660853, + -0.054870043, + -0.07221262, + 0.018527014, + 0.073324844, + 0.061312377, + 0.052779585, + -0.017265014, + 0.0044537727, + 0.054682422, + -0.055878393, + 0.064048916, + 0.03346393, + -0.028677322, + 0.050278228, + -0.02905618, + -0.038899165, + -0.03864768, + 0.042319845, + -0.06788429, + -0.06178736, + 0.06868577, + 0.05830974, + 0.02179902, + 0.052530885 + ] + } + ] + } + }, + "field": "CHAPTER 1\n\n\n\nLOOMINGS\n\n\n\nCall me Ishmael. Some years ago--never mind how long precisely--having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen, and regulating the circulation. Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul; whenever I find myself involuntarily pausing before coffin warehouses, and bringing up the rear of every funeral I meet; and especially whenever my hypos get such an upper hand of me, that it requires a strong moral principle to prevent me from deliberately stepping into the street, and methodically knocking people's hats off--then, I account it high time to get to sea as soon as I can. This is my substitute for pistol and ball. With a philosophical flourish Cato throws himself upon his sword; I quietly take to the ship. There is nothing surprising in this. If they but knew it, almost all men in their degree, some time or other, cherish very nearly the same feelings towards the ocean with me.\n\nThere now is your insular city of the Manhattoes, belted round by wharves as Indian isles by coral reefs--commerce surrounds it with her surf. Right and left, the streets take you waterward. Its extreme down-town is the battery, where that noble mole is washed by waves, and cooled by breezes, which a few hours previous were out of sight of land. Look at the crowds of water-gazers there.\n\nCircumambulate the city of a dreamy Sabbath afternoon. Go from Corlears Hook to Coenties Slip, and from thence, by Whitehall, northward. What do you see?--Posted like silent sentinels all around the town, stand thousands upon thousands of mortal men fixed in ocean reveries. Some leaning against the spiles; some seated upon the pier-heads; some looking over the bulwarks of ships from China; some high aloft in the rigging, as if striving to get a still better seaward peep. But these are all landsmen; of week days pent up in lath and plaster--tied to counters, nailed to benches, clinched to desks. How then is this? Are the green fields gone? What do they here?\n\nBut look! here come more crowds, pacing straight for the water, and seemingly bound for a dive. Strange! Nothing will content them but the extremest limit of the land; loitering under the shady lee of yonder warehouses will not suffice. No. They must get just as nigh the water as they possibly can without falling in. And there they stand--miles of them--leagues. Inlanders all, they come from lanes and alleys, streets and avenues--north, east, south, and west. Yet here they all unite. Tell me, does the magnetic virtue of the needles of the compasses of all those ships attract them thither?\n\nOnce more. Say, you are in the country; in some high land of lakes. Take almost any path you please, and ten to one it carries you down in a dale, and leaves you there by a pool in the stream. There is magic in it. Let the most absentminded of men be plunged in his deepest reveries--stand that man on his legs, set his feet a-going, and he will infallibly lead you to water, if water there be in all that region. Should you ever be athirst in the great American desert, try this experiment, if your caravan happen to be supplied with a metaphysical professor. Yes, as every one knows, meditation and water are wedded for ever.\n\nBut here is an artist. He desires to paint you the dreamiest, shadiest, quietest, most enchanting bit of romantic landscape in all the valley of the Saco. What is the chief element he employs? There stand his trees, each with a hollow trunk, as if a hermit and a crucifix were within; and here sleeps his meadow, and there sleep his cattle; and up from yonder cottage goes a sleepy smoke. Deep into distant woodlands winds a mazy way, reaching to overlapping spurs of mountains bathed in their hill-side blue. But though the picture lies thus tranced, and though this pine-tree shakes down its sighs like leaves upon this shepherd's head, yet all were vain, unless the shepherd's eye were fixed upon the magic stream before him. Go visit the Prairies in June, when for scores on scores of miles you wade knee-deep among Tiger-lilies--what is the one charm wanting?--Water--there is not a drop of water there! Were Niagara but a cataract of sand, would you travel your thousand miles to see it? Why did the poor poet of Tennessee, upon suddenly receiving two handfuls of silver, deliberate whether to buy him a coat, which he sadly needed, or invest his money in a pedestrian trip to Rockaway Beach? Why is almost every robust healthy boy with a robust healthy soul in him, at some time or other crazy to go to sea? Why upon your first voyage as a passenger, did you yourself feel such a mystical vibration, when first told that you and your ship were now out of sight of land? Why did the old Persians hold the sea holy? Why did the Greeks give it a separate deity, and own brother of Jove? Surely all this is not without meaning. And still deeper the meaning of that story of Narcissus, who because he could not grasp the tormenting, mild image he saw in the fountain, plunged into it and was drowned. But that same image, we ourselves see in all rivers and oceans. It is the image of the ungraspable phantom of life; and this is the key to it all.\n\nNow, when I say that I am in the habit of going to sea whenever I begin to grow hazy about the eyes, and begin to be over conscious of my lungs, I do not mean to have it inferred that I ever go to sea as a passenger. For to go as a passenger you must needs have a purse, and a purse is but a rag unless you have something in it. Besides, passengers get sea-sick--grow quarrelsome--don't sleep of nights--do not enjoy themselves much, as a general thing;--no, I never go as a passenger; nor, though I am something of a salt, do I ever go to sea as a Commodore, or a Captain, or a Cook. I abandon the glory and distinction of such offices to those who like them. For my part, I abominate all honorable respectable toils, trials, and tribulations of every kind whatsoever. It is quite as much as I can do to take care of myself, without taking care of ships, barques, brigs, schooners, and what not. And as for going as cook,--though I confess there is considerable glory in that, a cook being a sort of officer on ship-board--yet, somehow, I never fancied broiling fowls;--though once broiled, judiciously buttered, and judgmatically salted and peppered, there is no one who will speak more respectfully, not to say reverentially, of a broiled fowl than I will. It is out of the idolatrous dotings of the old Egyptians upon broiled ibis and roasted river horse, that you see the mummies of those creatures in their huge bake-houses the pyramids.\n\nNo, when I go to sea, I go as a simple sailor, right before the mast, plumb down into the forecastle, aloft there to the royal mast-head. True, they rather order me about some, and make me jump from spar to spar, like a grasshopper in a May meadow. And at first, this sort of thing is unpleasant enough. It touches one's sense of honor, particularly if you come of an old established family in the land, the Van Rensselaers, or Randolphs, or Hardicanutes. And more than all, if just previous to putting your hand into the tar-pot, you have been lording it as a country schoolmaster, making the tallest boys stand in awe of you. The transition is a keen one, I assure you, from a schoolmaster to a sailor, and requires a strong decoction of Seneca and the Stoics to enable you to grin and bear it. But even this wears off in time.\n\nWhat of it, if some old hunks of a sea-captain orders me to get a broom and sweep down the decks? What does that indignity amount to, weighed, I mean, in the scales of the New Testament? Do you think the archangel Gabriel thinks anything the less of me, because I promptly and respectfully obey that old hunks in that particular instance? Who ain't a slave? Tell me that. Well, then, however the old sea-captains may order me about--however they may thump and punch me about, I have the satisfaction of knowing that it is all right; that everybody else is one way or other served in much the same way--either in a physical or metaphysical point of view, that is; and so the universal thump is passed round, and all hands should rub each other's shoulder-blades, and be content.\n\nAgain, I always go to sea as a sailor, because they make a point of paying me for my trouble, whereas they never pay passengers a single penny that I ever heard of. On the contrary, passengers themselves must pay. And there is all the difference in the world between paying and being paid. The act of paying is perhaps the most uncomfortable infliction that the two orchard thieves entailed upon us. But being paid,--what will compare with it? The urbane activity with which a man receives money is really marvellous, considering that we so earnestly believe money to be the root of all earthly ills, and that on no account can a monied man enter heaven. Ah! how cheerfully we consign ourselves to perdition!\n\nFinally, I always go to sea as a sailor, because of the wholesome exercise and pure air of the forecastle deck. For as in this world, head winds are far more prevalent than winds from astern (that is, if you never violate the Pythagorean maxim), so for the most part the Commodore on the quarter-deck gets his atmosphere at second hand from the sailors on the forecastle. He thinks he breathes it first; but not so. In much the same way do the commonalty lead their leaders in many other things, at the same time that the leaders little suspect it. But wherefore it was that after having repeatedly smelt the sea as a merchant sailor, I should now take it into my head to go on a whaling voyage; this the invisible police officer of the Fates, who has the constant surveillance of me, and secretly dogs me, and influences me in some unaccountable way--he can better answer than any one else. And, doubtless, my going on this whaling voyage, formed part of the grand programme of Providence that was drawn up a long time ago. It came in as a sort of brief interlude and solo between more extensive performances. I take it that this part of the bill must have run something like this:\n\n\n\n\"grand contested election for the presidency of the united states.\n\n\"whaling voyage by one ishmael.\n\n\"bloody battle in afghanistan.\"\n\n\n\nThough I cannot tell why it was exactly that those stage managers, the Fates, put me down for this shabby part of a whaling voyage, when others were set down for magnificent parts in high tragedies, and short and easy parts in genteel comedies, and jolly parts in farces--though I cannot tell why this was exactly; yet, now that I recall all the circumstances, I think I can see a little into the springs and motives which being cunningly presented to me under various disguises, induced me to set about performing the part I did, besides cajoling me into the delusion that it was a choice resulting from my own unbiased freewill and discriminating judgment.\n\nChief among these motives was the overwhelming idea of the great whale himself. Such a portentous and mysterious monster roused all my curiosity. Then the wild and distant seas where he rolled his island bulk; the undeliverable, nameless perils of the whale; these, with all the attending marvels of a thousand Patagonian sights and sounds, helped to sway me to my wish. With other men, perhaps, such things would not have been inducements; but as for me, I am tormented with an everlasting itch for things remote. I love to sail forbidden seas, and land on barbarous coasts. Not ignoring what is good, I am quick to perceive a horror, and could still be social with it--would they let me--since it is but well to be on friendly terms with all the inmates of the place one lodges in.\n\nBy reason of these things, then, the whaling voyage was welcome; the great flood-gates of the wonder-world swung open, and in the wild conceits that swayed me to my purpose, two and two there floated into my inmost soul, endless processions of the whale, and, mid most of them all, one grand hooded phantom, like a snow hill in the air.\n\nCopyright © 1967 by Bantam Books\n\nPublisher\nModern Library\n\nCategories\nClassic Fiction\nLiterary Fiction\nFiction\nClassics\n\n\nAbout Moby-Dick\n\nAbout Herman Melville" +} \ No newline at end of file diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 1feb95661f33a..381a9bc1e8a55 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -49,7 +49,6 @@ import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; -import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.indices.AssociatedIndexDescriptor; import org.elasticsearch.indices.SystemIndexDescriptor; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; @@ -378,8 +377,6 @@ import org.elasticsearch.xpack.ml.process.MlMemoryTracker; import org.elasticsearch.xpack.ml.process.NativeController; import org.elasticsearch.xpack.ml.process.NativeStorageProvider; -import org.elasticsearch.xpack.ml.queries.SparseVectorQueryBuilder; -import org.elasticsearch.xpack.ml.queries.TextExpansionQueryBuilder; import org.elasticsearch.xpack.ml.rest.RestDeleteExpiredDataAction; import org.elasticsearch.xpack.ml.rest.RestMlInfoAction; import org.elasticsearch.xpack.ml.rest.RestMlMemoryAction; @@ -1775,22 +1772,6 @@ public List> getQueryVectorBuilders() { ); } - @Override - public List> getQueries() { - return List.of( - new QuerySpec( - TextExpansionQueryBuilder.NAME, - TextExpansionQueryBuilder::new, - TextExpansionQueryBuilder::fromXContent - ), - new QuerySpec( - SparseVectorQueryBuilder.NAME, - SparseVectorQueryBuilder::new, - SparseVectorQueryBuilder::fromXContent - ) - ); - } - private ContextParser checkAggLicense(ContextParser realParser, LicensedFeature.Momentary feature) { return (parser, name) -> { if (feature.check(getLicenseState()) == false) {