diff --git a/build-tools-internal/src/main/resources/changelog-schema.json b/build-tools-internal/src/main/resources/changelog-schema.json index 9692af7adc5e6..7d35951eaa2cf 100644 --- a/build-tools-internal/src/main/resources/changelog-schema.json +++ b/build-tools-internal/src/main/resources/changelog-schema.json @@ -279,7 +279,6 @@ "compatibilityChangeArea": { "type": "string", "enum": [ - "Aggregations", "Analysis", "Authorization", "Cluster and node setting", diff --git a/docs/changelog/118484.yaml b/docs/changelog/118484.yaml deleted file mode 100644 index 41db476a42523..0000000000000 --- a/docs/changelog/118484.yaml +++ /dev/null @@ -1,14 +0,0 @@ -pr: 118484 -summary: Remove date histogram boolean support -area: Aggregations -type: breaking -issues: [] -breaking: - title: Remove date histogram boolean support - area: Aggregations - details: Elasticsearch no longer allows running Date Histogram aggregations - over boolean fields. Instead, use Terms aggregation for boolean - fields. - impact: We expect the impact to be minimal, as this never produced good - results, and has been deprecated for years. - notable: false diff --git a/docs/reference/ml/trained-models/apis/start-trained-model-deployment.asciidoc b/docs/reference/ml/trained-models/apis/start-trained-model-deployment.asciidoc index bf9c4d14db290..6f7e2a4d9f988 100644 --- a/docs/reference/ml/trained-models/apis/start-trained-model-deployment.asciidoc +++ b/docs/reference/ml/trained-models/apis/start-trained-model-deployment.asciidoc @@ -138,8 +138,8 @@ normal priority deployments. Controls how many inference requests are allowed in the queue at a time. Every machine learning node in the cluster where the model can be allocated has a queue of this size; when the number of requests exceeds the total value, -new requests are rejected with a 429 error. Defaults to 10000. Max allowed value -is 100000. +new requests are rejected with a 429 error. Defaults to 1024. Max allowed value +is 1000000. `threads_per_allocation`:: (Optional, integer) @@ -173,7 +173,7 @@ The API returns the following results: "model_bytes": 265632637, "threads_per_allocation" : 1, "number_of_allocations" : 1, - "queue_capacity" : 10000, + "queue_capacity" : 1024, "priority": "normal" }, "routing_table": { @@ -229,4 +229,4 @@ POST _ml/trained_models/my_model/deployment/_start?deployment_id=my_model_for_se } } -------------------------------------------------- -// TEST[skip:TBD] +// TEST[skip:TBD] \ No newline at end of file diff --git a/muted-tests.yml b/muted-tests.yml index b5712b22fe583..93d1a6e6374b7 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -302,9 +302,6 @@ tests: - class: org.elasticsearch.index.engine.RecoverySourcePruneMergePolicyTests method: testPruneSome issue: https://github.com/elastic/elasticsearch/issues/118728 -- class: org.elasticsearch.smoketest.DocsClientYamlTestSuiteIT - method: test {yaml=reference/indices/shard-stores/line_150} - issue: https://github.com/elastic/elasticsearch/issues/118896 # Examples: # diff --git a/server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java b/server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java index 17a800dddff1a..74143cc5c059b 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java @@ -48,11 +48,9 @@ import org.elasticsearch.index.engine.VersionConflictEngineException; import org.elasticsearch.index.get.GetResult; import org.elasticsearch.index.mapper.DocumentMapper; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.MapperException; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MappingLookup; -import org.elasticsearch.index.mapper.RoutingFieldMapper; import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.index.seqno.SequenceNumbers; import org.elasticsearch.index.shard.IndexShard; @@ -328,8 +326,7 @@ static boolean executeBulkItemRequest( if (opType == DocWriteRequest.OpType.UPDATE) { final UpdateRequest updateRequest = (UpdateRequest) context.getCurrent(); try { - var gFields = getStoredFieldsSpec(context.getPrimary()); - updateResult = updateHelper.prepare(updateRequest, context.getPrimary(), nowInMillisSupplier, gFields); + updateResult = updateHelper.prepare(updateRequest, context.getPrimary(), nowInMillisSupplier); } catch (Exception failure) { // we may fail translating a update to index or delete operation // we use index result to communicate failure while translating update request @@ -404,16 +401,6 @@ static boolean executeBulkItemRequest( return true; } - private static String[] getStoredFieldsSpec(IndexShard indexShard) { - if (InferenceMetadataFieldsMapper.isEnabled(indexShard.indexSettings().getIndexVersionCreated())) { - if (indexShard.mapperService().mappingLookup().inferenceFields().size() > 0) { - // Retrieves the inference metadata field containing the inference results for all semantic fields defined in the mapping. - return new String[] { RoutingFieldMapper.NAME, InferenceMetadataFieldsMapper.NAME }; - } - } - return new String[] { RoutingFieldMapper.NAME }; - } - private static boolean handleMappingUpdateRequired( BulkPrimaryExecutionContext context, MappingUpdatePerformer mappingUpdater, diff --git a/server/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java b/server/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java index eedf4ded5f7e4..0749512635f83 100644 --- a/server/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java +++ b/server/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java @@ -44,7 +44,6 @@ import org.elasticsearch.index.IndexService; import org.elasticsearch.index.engine.VersionConflictEngineException; import org.elasticsearch.index.mapper.InferenceFieldMapper; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.shard.IndexShard; @@ -375,8 +374,7 @@ private static UpdateHelper.Result deleteInferenceResults( IndexMetadata indexMetadata, MappingLookup mappingLookup ) { - if (result.getResponseResult() != DocWriteResponse.Result.UPDATED - || InferenceMetadataFieldsMapper.isEnabled(indexMetadata.getCreationVersion())) { + if (result.getResponseResult() != DocWriteResponse.Result.UPDATED) { return result; } @@ -405,7 +403,7 @@ private static UpdateHelper.Result deleteInferenceResults( String inferenceFieldName = entry.getKey(); Mapper mapper = mappingLookup.getMapper(inferenceFieldName); - if (mapper instanceof InferenceFieldMapper) { + if (mapper instanceof InferenceFieldMapper inferenceFieldMapper) { String[] sourceFields = entry.getValue().getSourceFields(); for (String sourceField : sourceFields) { if (sourceField.equals(inferenceFieldName) == false @@ -414,7 +412,7 @@ private static UpdateHelper.Result deleteInferenceResults( // This has two important side effects: // - The inference field value will remain parsable by its mapper // - The inference results will be removed, forcing them to be re-generated downstream - updatedSource.put(inferenceFieldName, getOriginalValueLegacy(inferenceFieldName, updatedSource)); + updatedSource.put(inferenceFieldName, inferenceFieldMapper.getOriginalValue(updatedSource)); updatedSourceModified = true; break; } @@ -437,24 +435,4 @@ private static UpdateHelper.Result deleteInferenceResults( return returnedResult; } - - /** - * Get the field's original value (i.e. the value the user specified) from the provided source. - * - * @param sourceAsMap The source as a map - * @return The field's original value, or {@code null} if none was provided - */ - private static Object getOriginalValueLegacy(String fullPath, Map sourceAsMap) { - // TODO: Fix bug here when semantic text field is in an object - Object fieldValue = sourceAsMap.get(fullPath); - if (fieldValue == null) { - return null; - } else if (fieldValue instanceof Map == false) { - // Don't try to further validate the non-map value, that will be handled when the source is fully parsed - return fieldValue; - } - - Map fieldValueMap = XContentMapValues.nodeMapValue(fieldValue, "Field [" + fullPath + "]"); - return XContentMapValues.extractValue("text", fieldValueMap); - } } diff --git a/server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java b/server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java index 39600c7eca661..a645c156b63c7 100644 --- a/server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java +++ b/server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java @@ -60,15 +60,7 @@ public UpdateHelper(ScriptService scriptService) { * Prepares an update request by converting it into an index or delete request or an update response (no action). */ public Result prepare(UpdateRequest request, IndexShard indexShard, LongSupplier nowInMillis) throws IOException { - // TODO: Don't hard-code gFields - return prepare(request, indexShard, nowInMillis, new String[] { RoutingFieldMapper.NAME }); - } - - /** - * Prepares an update request by converting it into an index or delete request or an update response (no action). - */ - public Result prepare(UpdateRequest request, IndexShard indexShard, LongSupplier nowInMillis, String[] gFields) throws IOException { - final GetResult getResult = indexShard.getService().getForUpdate(request.id(), request.ifSeqNo(), request.ifPrimaryTerm(), gFields); + final GetResult getResult = indexShard.getService().getForUpdate(request.id(), request.ifSeqNo(), request.ifPrimaryTerm()); return prepare(indexShard, request, getResult, nowInMillis); } diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index b33e755aea1b8..fd321f6256194 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -139,8 +139,6 @@ private static Version parseUnchecked(String version) { public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_00_0, Version.LUCENE_10_0_0); public static final IndexVersion DEPRECATE_SOURCE_MODE_MAPPER = def(9_003_00_0, Version.LUCENE_10_0_0); public static final IndexVersion USE_SYNTHETIC_SOURCE_FOR_RECOVERY = def(9_004_00_0, Version.LUCENE_10_0_0); - public static final IndexVersion INFERENCE_METADATA_FIELDS = def(9_005_00_0, Version.LUCENE_10_0_0); - /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java b/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java index 73f49021805bc..0f772b49bf92b 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java @@ -440,7 +440,7 @@ private void readStoredFieldsDirectly(StoredFieldVisitor visitor) throws IOExcep SourceFieldMapper mapper = mappingLookup.getMapping().getMetadataMapperByClass(SourceFieldMapper.class); if (mapper != null) { try { - sourceBytes = mapper.applyFilters(null, sourceBytes, null); + sourceBytes = mapper.applyFilters(sourceBytes, null); } catch (IOException e) { throw new IOException("Failed to reapply filters after reading from translog", e); } diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index 7ada770e2c4fd..43b5d2c7d3f78 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -9,9 +9,7 @@ package org.elasticsearch.index.get; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.search.IndexSearcher; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.document.DocumentField; @@ -28,7 +26,6 @@ import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.IgnoredFieldMapper; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperMetrics; @@ -42,7 +39,6 @@ import org.elasticsearch.index.shard.MultiEngineGet; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.search.lookup.Source; -import org.elasticsearch.search.lookup.SourceFilter; import java.io.IOException; import java.util.ArrayList; @@ -194,13 +190,9 @@ public GetResult getFromTranslog( } public GetResult getForUpdate(String id, long ifSeqNo, long ifPrimaryTerm) throws IOException { - return getForUpdate(id, ifSeqNo, ifPrimaryTerm, new String[] { RoutingFieldMapper.NAME }); - } - - public GetResult getForUpdate(String id, long ifSeqNo, long ifPrimaryTerm, String[] gFields) throws IOException { return get( id, - gFields, + new String[] { RoutingFieldMapper.NAME }, true, Versions.MATCH_ANY, VersionType.INTERNAL, @@ -296,17 +288,11 @@ private GetResult innerGetFetch( boolean forceSyntheticSource ) throws IOException { assert get.exists() : "method should only be called if document could be retrieved"; + // check first if stored fields to be loaded don't contain an object field MappingLookup mappingLookup = mapperService.mappingLookup(); - final IndexVersion indexVersion = indexSettings.getIndexVersionCreated(); - final Set storedFieldSet = new HashSet<>(); - boolean hasInferenceMetadataFields = false; if (storedFields != null) { for (String field : storedFields) { - if (field.equals(InferenceMetadataFieldsMapper.NAME) && InferenceMetadataFieldsMapper.isEnabled(indexVersion)) { - hasInferenceMetadataFields = true; - continue; - } Mapper fieldMapper = mappingLookup.getMapper(field); if (fieldMapper == null) { if (mappingLookup.objectMappers().get(field) != null) { @@ -314,7 +300,6 @@ private GetResult innerGetFetch( throw new IllegalArgumentException("field [" + field + "] isn't a leaf field"); } } - storedFieldSet.add(field); } } @@ -328,8 +313,8 @@ private GetResult innerGetFetch( () -> mappingLookup.getMapping().syntheticFieldLoader(sourceFilter), mapperMetrics.sourceFieldMetrics() ) - : mappingLookup.newSourceLoader(sourceFilter, mapperMetrics.sourceFieldMetrics()); - StoredFieldLoader storedFieldLoader = buildStoredFieldLoader(storedFieldSet, fetchSourceContext, loader); + : mappingLookup.newSourceLoader(fetchSourceContext.filter(), mapperMetrics.sourceFieldMetrics()); + StoredFieldLoader storedFieldLoader = buildStoredFieldLoader(storedFields, fetchSourceContext, loader); LeafStoredFieldLoader leafStoredFieldLoader = storedFieldLoader.getLoader(docIdAndVersion.reader.getContext(), null); try { leafStoredFieldLoader.advanceTo(docIdAndVersion.docId); @@ -338,6 +323,7 @@ private GetResult innerGetFetch( } // put stored fields into result objects + final IndexVersion indexVersion = indexSettings.getIndexVersionCreated(); if (leafStoredFieldLoader.storedFields().isEmpty() == false) { Set needed = new HashSet<>(); if (storedFields != null) { @@ -386,19 +372,6 @@ private GetResult innerGetFetch( if (mapperService.mappingLookup().isSourceEnabled() && fetchSourceContext.fetchSource()) { Source source = loader.leaf(docIdAndVersion.reader, new int[] { docIdAndVersion.docId }) .source(leafStoredFieldLoader, docIdAndVersion.docId); - - SourceFilter filter = fetchSourceContext.filter(); - if (filter != null) { - source = source.filter(filter); - } - - if (hasInferenceMetadataFields) { - /** - * Adds the {@link InferenceMetadataFieldsMapper#NAME} field from the document fields - * to the original _source if it has been requested. - */ - source = addInferenceMetadataFields(mapperService, docIdAndVersion.reader.getContext(), docIdAndVersion.docId, source); - } sourceBytes = source.internalSourceRef(); } @@ -431,38 +404,18 @@ private static DocumentField loadIgnoredMetadataField(final DocIdAndVersion docI return new DocumentField(IgnoredFieldMapper.NAME, ignoredValues); } - private static Source addInferenceMetadataFields(MapperService mapperService, LeafReaderContext readerContext, int docId, Source source) - throws IOException { - var mappingLookup = mapperService.mappingLookup(); - var inferenceMetadata = (InferenceMetadataFieldsMapper) mappingLookup.getMapping() - .getMetadataMapperByName(InferenceMetadataFieldsMapper.NAME); - if (inferenceMetadata == null || mapperService.mappingLookup().inferenceFields().isEmpty()) { - return source; + private static StoredFieldLoader buildStoredFieldLoader(String[] fields, FetchSourceContext fetchSourceContext, SourceLoader loader) { + Set fieldsToLoad = new HashSet<>(); + if (fields != null && fields.length > 0) { + Collections.addAll(fieldsToLoad, fields); } - var inferenceLoader = inferenceMetadata.fieldType() - .valueFetcher(mappingLookup, mapperService.getBitSetProducer(), new IndexSearcher(readerContext.reader())); - inferenceLoader.setNextReader(readerContext); - List values = inferenceLoader.fetchValues(source, docId, List.of()); - if (values.size() == 1) { - var newSource = source.source(); - newSource.put(InferenceMetadataFieldsMapper.NAME, values.get(0)); - return Source.fromMap(newSource, source.sourceContentType()); - } - return source; - } - - private static StoredFieldLoader buildStoredFieldLoader( - Set fields, - FetchSourceContext fetchSourceContext, - SourceLoader loader - ) { if (fetchSourceContext.fetchSource()) { - fields.addAll(loader.requiredStoredFields()); + fieldsToLoad.addAll(loader.requiredStoredFields()); } else { - if (fields.isEmpty()) { + if (fieldsToLoad.isEmpty()) { return StoredFieldLoader.empty(); } } - return StoredFieldLoader.create(fetchSourceContext.fetchSource(), fields); + return StoredFieldLoader.create(fetchSourceContext.fetchSource(), fieldsToLoad); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/InferenceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/InferenceFieldMapper.java index f7c6eef7dfd49..249ef5004e59c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/InferenceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/InferenceFieldMapper.java @@ -12,6 +12,7 @@ import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.inference.InferenceService; +import java.util.Map; import java.util.Set; /** @@ -25,4 +26,12 @@ public interface InferenceFieldMapper { * @param sourcePaths The source path that populates the input for the field (before inference) */ InferenceFieldMetadata getMetadata(Set sourcePaths); + + /** + * Get the field's original value (i.e. the value the user specified) from the provided source. + * + * @param sourceAsMap The source as a map + * @return The field's original value, or {@code null} if none was provided + */ + Object getOriginalValue(Map sourceAsMap); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/InferenceMetadataFieldsMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/InferenceMetadataFieldsMapper.java deleted file mode 100644 index c3f88941f5de8..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/mapper/InferenceMetadataFieldsMapper.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.mapper; - -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.join.BitSetProducer; -import org.elasticsearch.common.util.FeatureFlag; -import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; -import org.elasticsearch.index.query.SearchExecutionContext; - -import java.util.Map; -import java.util.function.Function; - -/** - * An abstract {@link MetadataFieldMapper} used as a placeholder for implementation - * in the inference module. It is required by {@link SourceFieldMapper} to identify - * the field name for removal from _source. - */ -public abstract class InferenceMetadataFieldsMapper extends MetadataFieldMapper { - public static final FeatureFlag INFERENCE_METADATA_FIELDS_FEATURE_FLAG = new FeatureFlag("inference_metadata_fields"); - - public static final String NAME = "_inference_fields"; - public static final String CONTENT_TYPE = "_inference_fields"; - - protected InferenceMetadataFieldsMapper(MappedFieldType inferenceFieldType) { - super(inferenceFieldType); - } - - @Override - protected String contentType() { - return CONTENT_TYPE; - } - - @Override - public InferenceMetadataFieldType fieldType() { - return (InferenceMetadataFieldType) super.fieldType(); - } - - public abstract static class InferenceMetadataFieldType extends MappedFieldType { - public InferenceMetadataFieldType() { - super(NAME, false, false, false, TextSearchInfo.NONE, Map.of()); - } - - /** - * Returns a {@link ValueFetcher} without requiring the construction of a full {@link SearchExecutionContext}. - */ - public abstract ValueFetcher valueFetcher( - MappingLookup mappingLookup, - Function bitSetCache, - IndexSearcher searcher - ); - } - - public static boolean isEnabled(IndexVersion indexVersion) { - return indexVersion.onOrAfter(IndexVersions.INFERENCE_METADATA_FIELDS) && INFERENCE_METADATA_FIELDS_FEATURE_FLAG.isEnabled(); - } -} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java index fb4f86c3cba98..1673b1719d8bf 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -171,7 +171,6 @@ public boolean isAutoUpdate() { private final IndexVersion indexVersionCreated; private final MapperRegistry mapperRegistry; private final Supplier mappingParserContextSupplier; - private final Function bitSetProducer; private final MapperMetrics mapperMetrics; private volatile DocumentMapper mapper; @@ -246,7 +245,6 @@ public MapperService( this::getMetadataMappers, this::resolveDocumentType ); - this.bitSetProducer = bitSetProducer; this.mapperMetrics = mapperMetrics; } @@ -828,10 +826,6 @@ public MapperRegistry getMapperRegistry() { return mapperRegistry; } - public Function getBitSetProducer() { - return bitSetProducer; - } - public MapperMetrics getMapperMetrics() { return mapperMetrics; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java b/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java index 907f7265e98af..1278ebf0a393a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java @@ -106,7 +106,7 @@ public T getMetadataMapperByClass(Class clazz return (T) metadataMappersMap.get(clazz); } - public MetadataFieldMapper getMetadataMapperByName(String mapperName) { + MetadataFieldMapper getMetadataMapperByName(String mapperName) { return metadataMappersByName.get(mapperName); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java index 6a107dbaa9e63..46b70193ba0e8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java @@ -855,7 +855,7 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep } - public ObjectMapper findParentMapper(String leafFieldPath) { + ObjectMapper findParentMapper(String leafFieldPath) { var pathComponents = leafFieldPath.split("\\."); int startPathComponent = 0; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java index 7b76187e8102e..85f4217811a84 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java @@ -401,10 +401,10 @@ public boolean isComplete() { } @Override - public void postParse(DocumentParserContext context) throws IOException { + public void preParse(DocumentParserContext context) throws IOException { BytesReference originalSource = context.sourceToParse().source(); XContentType contentType = context.sourceToParse().getXContentType(); - final BytesReference adaptedSource = applyFilters(context, originalSource, contentType); + final BytesReference adaptedSource = applyFilters(originalSource, contentType); if (adaptedSource != null) { final BytesRef ref = adaptedSource.toBytesRef(); @@ -432,28 +432,13 @@ public void postParse(DocumentParserContext context) throws IOException { } @Nullable - public BytesReference applyFilters( - @Nullable DocumentParserContext context, - @Nullable BytesReference originalSource, - @Nullable XContentType contentType - ) throws IOException { - if (stored() == false || originalSource == null) { + public BytesReference applyFilters(@Nullable BytesReference originalSource, @Nullable XContentType contentType) throws IOException { + if (stored() == false) { return null; } - var modSourceFilter = sourceFilter; - if (context != null - && InferenceMetadataFieldsMapper.isEnabled(context.indexSettings().getIndexVersionCreated()) - && context.mappingLookup().inferenceFields().isEmpty() == false) { - String[] modExcludes = new String[excludes != null ? excludes.length + 1 : 1]; - if (excludes != null) { - System.arraycopy(excludes, 0, modExcludes, 0, excludes.length); - } - modExcludes[modExcludes.length - 1] = InferenceMetadataFieldsMapper.NAME; - modSourceFilter = new SourceFilter(includes, modExcludes); - } - if (modSourceFilter != null) { + if (originalSource != null && sourceFilter != null) { // Percolate and tv APIs may not set the source and that is ok, because these APIs will not index any data - return Source.fromBytes(originalSource, contentType).filter(modSourceFilter).internalSourceRef(); + return Source.fromBytes(originalSource, contentType).filter(sourceFilter).internalSourceRef(); } else { return originalSource; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index c73f4fef23361..b4de73e3b62ce 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -78,11 +78,6 @@ public Builder(String name) { super(name); } - public Builder setStored(boolean value) { - stored.setValue(value); - return this; - } - @Override protected Parameter[] getParameters() { return new Parameter[] { stored, meta }; diff --git a/server/src/main/java/org/elasticsearch/search/SearchHit.java b/server/src/main/java/org/elasticsearch/search/SearchHit.java index 4f070b0d455ca..98f7c92d9997a 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchHit.java +++ b/server/src/main/java/org/elasticsearch/search/SearchHit.java @@ -517,10 +517,6 @@ public void addDocumentFields(Map docFields, Map plugins) { registerFetchSubPhase(new StoredFieldsPhase()); registerFetchSubPhase(new FetchDocValuesPhase()); registerFetchSubPhase(new ScriptFieldsPhase()); - registerFetchSubPhase(new FetchFieldsPhase()); - // register after fetch fields to handle metadata fields that needs to be copied in _source (e.g. _inference_fields). registerFetchSubPhase(new FetchSourcePhase()); + registerFetchSubPhase(new FetchFieldsPhase()); registerFetchSubPhase(new FetchVersionPhase()); registerFetchSubPhase(new SeqNoPrimaryTermPhase()); registerFetchSubPhase(new MatchedQueriesPhase()); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java index 4d0f58756b11c..a8ccd1c76d031 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java @@ -11,6 +11,7 @@ import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.common.Rounding; +import org.elasticsearch.common.logging.DeprecationCategory; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.aggregations.Aggregator; @@ -41,6 +42,49 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) { ); builder.register(DateHistogramAggregationBuilder.REGISTRY_KEY, CoreValuesSourceType.RANGE, DateRangeHistogramAggregator::new, true); + + builder.register( + DateHistogramAggregationBuilder.REGISTRY_KEY, + CoreValuesSourceType.BOOLEAN, + ( + name, + factories, + rounding, + order, + keyed, + minDocCount, + downsampledResultsOffset, + extendedBounds, + hardBounds, + valuesSourceConfig, + context, + parent, + cardinality, + metadata) -> { + DEPRECATION_LOGGER.warn( + DeprecationCategory.AGGREGATIONS, + "date-histogram-boolean", + "Running DateHistogram aggregations on [boolean] fields is deprecated" + ); + return DateHistogramAggregator.build( + name, + factories, + rounding, + order, + keyed, + minDocCount, + downsampledResultsOffset, + extendedBounds, + hardBounds, + valuesSourceConfig, + context, + parent, + cardinality, + metadata + ); + }, + true + ); } private final DateHistogramAggregationSupplier aggregatorSupplier; diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java index 2523c62015215..0bbbff3a5d5f4 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java @@ -10,7 +10,6 @@ package org.elasticsearch.search.fetch; import org.apache.lucene.search.Query; -import org.elasticsearch.index.cache.bitset.BitsetFilterCache; import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.index.query.ParsedQuery; @@ -88,10 +87,6 @@ private static StoredFieldsContext buildStoredFieldsContext(SearchContext in) { return sfc; } - public BitsetFilterCache bitsetFilterCache() { - return searchContext.bitsetFilterCache(); - } - /** * The name of the index that documents are being fetched from */ diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java index 4f2f79599dd8b..79e51036a91be 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java @@ -10,7 +10,6 @@ package org.elasticsearch.search.fetch.subphase; import org.apache.lucene.index.LeafReaderContext; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.FetchContext; import org.elasticsearch.search.fetch.FetchSubPhase; @@ -64,7 +63,6 @@ private void hitExecute(HitContext hitContext) { // If this is a parent document and there are no source filters, then add the source as-is. if (nestedHit == false && sourceFilter == null) { - source = replaceInferenceMetadataFields(hitContext.hit(), source); hitContext.hit().sourceRef(source.internalSourceRef()); fastPath++; return; @@ -79,32 +77,10 @@ private void hitExecute(HitContext hitContext) { } if (nestedHit) { source = extractNested(source, hitContext.hit().getNestedIdentity()); - } else { - source = replaceInferenceMetadataFields(hitContext.hit(), source); } hitContext.hit().sourceRef(source.internalSourceRef()); } - /** - * Transfers the {@link InferenceMetadataFieldsMapper#NAME} field from the document fields - * to the original _source if it has been requested. - */ - private Source replaceInferenceMetadataFields(SearchHit hit, Source source) { - if (InferenceMetadataFieldsMapper.isEnabled( - fetchContext.getSearchExecutionContext().getIndexSettings().getIndexVersionCreated() - ) == false) { - return source; - } - - var field = hit.removeMetadataFields(InferenceMetadataFieldsMapper.NAME); - if (field == null || field.getValues().isEmpty()) { - return source; - } - var newSource = source.source(); - newSource.put(InferenceMetadataFieldsMapper.NAME, field.getValues().get(0)); - return Source.fromMap(newSource, source.sourceContentType()); - } - @Override public Map getDebugInfo() { return Map.of("fast_path", fastPath); diff --git a/server/src/test/java/org/elasticsearch/action/bulk/TransportShardBulkActionTests.java b/server/src/test/java/org/elasticsearch/action/bulk/TransportShardBulkActionTests.java index f25f477655482..b389e33993b9b 100644 --- a/server/src/test/java/org/elasticsearch/action/bulk/TransportShardBulkActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/bulk/TransportShardBulkActionTests.java @@ -92,16 +92,8 @@ public class TransportShardBulkActionTests extends IndexShardTestCase { private final ShardId shardId = new ShardId("index", "_na_", 0); private final Settings idxSettings = indexSettings(IndexVersion.current(), 1, 0).build(); - private IndexMetadata indexMetadata(String mapping) { - IndexMetadata.Builder builder = IndexMetadata.builder("index").settings(idxSettings).primaryTerm(0, 1); - if (mapping != null) { - builder.putMapping(mapping); - } - return builder.build(); - } - - private IndexMetadata indexMetadata() { - return indexMetadata(""" + private IndexMetadata indexMetadata() throws IOException { + return IndexMetadata.builder("index").putMapping(""" { "properties": { "foo": { @@ -114,7 +106,7 @@ private IndexMetadata indexMetadata() { } } } - }"""); + }""").settings(idxSettings).primaryTerm(0, 1).build(); } public void testExecuteBulkIndexRequest() throws Exception { @@ -510,7 +502,7 @@ public void testNoopUpdateRequest() throws Exception { IndexShard shard = mockShard(null, null); UpdateHelper updateHelper = mock(UpdateHelper.class); - when(updateHelper.prepare(any(), eq(shard), any(), any())).thenReturn( + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( new UpdateHelper.Result( noopUpdateResponse, DocWriteResponse.Result.NOOP, @@ -565,7 +557,7 @@ public void testUpdateRequestWithFailure() throws Exception { ); UpdateHelper updateHelper = mock(UpdateHelper.class); - when(updateHelper.prepare(any(), eq(shard), any(), any())).thenReturn( + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( new UpdateHelper.Result( updateResponse, randomBoolean() ? DocWriteResponse.Result.CREATED : DocWriteResponse.Result.UPDATED, @@ -626,7 +618,7 @@ public void testUpdateRequestWithConflictFailure() throws Exception { ); UpdateHelper updateHelper = mock(UpdateHelper.class); - when(updateHelper.prepare(any(), eq(shard), any(), any())).thenReturn( + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( new UpdateHelper.Result( updateResponse, randomBoolean() ? DocWriteResponse.Result.CREATED : DocWriteResponse.Result.UPDATED, @@ -689,7 +681,7 @@ public void testUpdateRequestWithSuccess() throws Exception { ); UpdateHelper updateHelper = mock(UpdateHelper.class); - when(updateHelper.prepare(any(), eq(shard), any(), any())).thenReturn( + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( new UpdateHelper.Result( updateResponse, created ? DocWriteResponse.Result.CREATED : DocWriteResponse.Result.UPDATED, @@ -746,7 +738,7 @@ public void testUpdateWithDelete() throws Exception { when(shard.applyDeleteOperationOnPrimary(anyLong(), any(), any(), anyLong(), anyLong())).thenReturn(deleteResult); UpdateHelper updateHelper = mock(UpdateHelper.class); - when(updateHelper.prepare(any(), eq(shard), any(), any())).thenReturn( + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( new UpdateHelper.Result( updateResponse, DocWriteResponse.Result.DELETED, @@ -792,7 +784,7 @@ public void testFailureDuringUpdateProcessing() throws Exception { UpdateHelper updateHelper = mock(UpdateHelper.class); final ElasticsearchException err = new ElasticsearchException("oops"); - when(updateHelper.prepare(any(), eq(shard), any(), any())).thenThrow(err); + when(updateHelper.prepare(any(), eq(shard), any())).thenThrow(err); BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); @@ -924,7 +916,7 @@ public void testRetries() throws Exception { }); UpdateHelper updateHelper = mock(UpdateHelper.class); - when(updateHelper.prepare(any(), eq(shard), any(), any())).thenReturn( + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( new UpdateHelper.Result( updateResponse, randomBoolean() ? DocWriteResponse.Result.CREATED : DocWriteResponse.Result.UPDATED, @@ -1137,7 +1129,7 @@ public void testNoopMappingUpdateInfiniteLoopPrevention() throws Exception { ); UpdateHelper updateHelper = mock(UpdateHelper.class); - when(updateHelper.prepare(any(), eq(shard), any(), any())).thenReturn( + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( new UpdateHelper.Result( new IndexRequest("index").id("id").source(Requests.INDEX_CONTENT_TYPE, "field", "value"), randomBoolean() ? DocWriteResponse.Result.CREATED : DocWriteResponse.Result.UPDATED, @@ -1203,7 +1195,7 @@ public void testNoopMappingUpdateSuccessOnRetry() throws Exception { ); UpdateHelper updateHelper = mock(UpdateHelper.class); - when(updateHelper.prepare(any(), eq(shard), any(), any())).thenReturn( + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( new UpdateHelper.Result( new IndexRequest("index").id("id").source(Requests.INDEX_CONTENT_TYPE, "field", "value"), randomBoolean() ? DocWriteResponse.Result.CREATED : DocWriteResponse.Result.UPDATED, @@ -1243,9 +1235,6 @@ private IndexShard mockShard(IndexSettings indexSettings, MapperService mapperSe if (indexSettings != null) { when(shard.indexSettings()).thenReturn(indexSettings); - } else { - IndexSettings defaultIndexSettings = new IndexSettings(indexMetadata(null), Settings.EMPTY); - when(shard.indexSettings()).thenReturn(defaultIndexSettings); } if (mapperService != null) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MappingLookupInferenceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MappingLookupInferenceFieldMapperTests.java index 755b83e8eb7ad..b1470c1ee5b3b 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/MappingLookupInferenceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/MappingLookupInferenceFieldMapperTests.java @@ -105,6 +105,11 @@ public InferenceFieldMetadata getMetadata(Set sourcePaths) { return new InferenceFieldMetadata(fullPath(), INFERENCE_ID, SEARCH_INFERENCE_ID, sourcePaths.toArray(new String[0])); } + @Override + public Object getOriginalValue(Map sourceAsMap) { + return null; + } + @Override protected void parseCreateField(DocumentParserContext context) {} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java index bf26326abafbf..38294fb030ed4 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java @@ -83,9 +83,9 @@ public class DateHistogramAggregatorTests extends DateHistogramAggregatorTestCas "2017-12-12T22:55:46" ); - public void testBooleanFieldUnsupported() throws IOException { + public void testBooleanFieldDeprecated() throws IOException { final String fieldName = "bogusBoolean"; - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> testCase(iw -> { + testCase(iw -> { Document d = new Document(); d.add(new SortedNumericDocValuesField(fieldName, 0)); iw.addDocument(d); @@ -95,8 +95,8 @@ public void testBooleanFieldUnsupported() throws IOException { new DateHistogramAggregationBuilder("name").calendarInterval(DateHistogramInterval.HOUR).field(fieldName), new BooleanFieldMapper.BooleanFieldType(fieldName) ) - )); - assertThat(e.getMessage(), equalTo("Field [bogusBoolean] of type [boolean] is not supported for aggregation [date_histogram]")); + ); + assertWarnings("Running DateHistogram aggregations on [boolean] fields is deprecated"); } public void testMatchNoDocs() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhaseTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhaseTests.java index 971f3bab7b6a3..deada75279e33 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhaseTests.java @@ -11,12 +11,8 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.memory.MemoryIndex; -import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.FetchContext; @@ -194,12 +190,6 @@ private HitContext hitExecuteMultiple( when(fetchContext.getIndexName()).thenReturn("index"); SearchExecutionContext sec = mock(SearchExecutionContext.class); when(sec.isSourceEnabled()).thenReturn(sourceBuilder != null); - IndexSettings indexSettings = new IndexSettings( - IndexMetadata.builder("index").settings(indexSettings(IndexVersion.current(), 1, 0)).build(), - Settings.EMPTY - ); - when(sec.indexVersionCreated()).thenReturn(indexSettings.getIndexVersionCreated()); - when(sec.getIndexSettings()).thenReturn(indexSettings); when(fetchContext.getSearchExecutionContext()).thenReturn(sec); final SearchHit searchHit = SearchHit.unpooled(1, null, nestedIdentity); diff --git a/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java index ec657119acb33..0d505dab40fed 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java @@ -69,6 +69,7 @@ import static org.hamcrest.Matchers.instanceOf; public abstract class AbstractQueryTestCase> extends AbstractBuilderTestCase { + private static final int NUMBER_OF_TESTQUERIES = 20; public final QB createTestQueryBuilder() { diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index 89298fde63be1..fb37fb3575551 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -94,6 +94,5 @@ tasks.named("yamlRestCompatTestTransform").configure({ task -> task.skipTest("privileges/11_builtin/Test get builtin privileges" ,"unnecessary to test compatibility") task.skipTest("esql/61_enrich_ip/Invalid IP strings", "We switched from exceptions to null+warnings for ENRICH runtime errors") task.skipTest("esql/180_match_operator/match with non text field", "Match operator can now be used on non-text fields") - task.skipTest("esql/40_unsupported_types/semantic_text declared in mapping", "The semantic text field format changed") }) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index 23f9e91dc32da..f5923a4942634 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -395,6 +395,8 @@ public List getNamedXContent() { ); } + // TODO: The WeightedTokensBuilder is slated for removal after the SparseVectorQueryBuilder is available. + // The logic to create a Boolean query based on weighted tokens will remain and/or be moved to server. @Override public List> getQueries() { return List.of( diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java index fb7d828092a36..cded9b8dce5e2 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java @@ -75,7 +75,7 @@ private WeightedTokensQueryBuilder createTestQueryBuilder(boolean onlyScorePrune @Override protected Collection> getPlugins() { - return List.of(MapperExtrasPlugin.class, XPackClientPlugin.class); + return List.of(XPackClientPlugin.class, MapperExtrasPlugin.class); } @Override diff --git a/x-pack/plugin/inference/build.gradle b/x-pack/plugin/inference/build.gradle index 760e8d33fa113..1d0236a5834e5 100644 --- a/x-pack/plugin/inference/build.gradle +++ b/x-pack/plugin/inference/build.gradle @@ -34,7 +34,6 @@ dependencies { testImplementation(testArtifact(project(':server'))) testImplementation(project(':x-pack:plugin:inference:qa:test-service-plugin')) testImplementation project(':modules:reindex') - testImplementation project(':modules:mapper-extras') clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin') api "com.ibm.icu:icu4j:${versions.icu4j}" diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java index 060da2d84cc5d..c7b3a9d42f579 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java @@ -17,15 +17,11 @@ import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.update.UpdateRequestBuilder; import org.elasticsearch.cluster.metadata.IndexMetadata; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.test.ESIntegTestCase; -import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xpack.inference.LocalStateInferencePlugin; import org.elasticsearch.xpack.inference.Utils; import org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension; @@ -34,6 +30,7 @@ import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Locale; @@ -44,9 +41,8 @@ import static org.hamcrest.Matchers.equalTo; public class ShardBulkInferenceActionFilterIT extends ESIntegTestCase { - public static final String INDEX_NAME = "test-index"; - private IndexVersion indexVersion; + public static final String INDEX_NAME = "test-index"; @Before public void setup() throws Exception { @@ -66,19 +62,8 @@ protected Collection> nodePlugins() { return Arrays.asList(LocalStateInferencePlugin.class); } - @Override - public Settings indexSettings() { - return Settings.builder() - .put(IndexMetadata.SETTING_VERSION_CREATED, indexVersion) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 10)) - .build(); - } - public void testBulkOperations() throws Exception { - this.indexVersion = randomFrom( - IndexVersionUtils.randomPreviousCompatibleVersion(random(), IndexVersions.INFERENCE_METADATA_FIELDS), - IndexVersionUtils.randomVersionBetween(random(), IndexVersions.INFERENCE_METADATA_FIELDS, IndexVersion.current()) - ); + Map shardsSettings = Collections.singletonMap(IndexMetadata.SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 10)); indicesAdmin().prepareCreate(INDEX_NAME) .setMapping( String.format( @@ -101,6 +86,7 @@ public void testBulkOperations() throws Exception { TestDenseInferenceServiceExtension.TestInferenceService.NAME ) ) + .setSettings(shardsSettings) .get(); int totalBulkReqs = randomIntBetween(2, 100); @@ -166,4 +152,5 @@ public void testBulkOperations() throws Exception { searchResponse.decRef(); } } + } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index 2eeb237494948..93743a5485c2c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -25,7 +25,6 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.mapper.Mapper; -import org.elasticsearch.index.mapper.MetadataFieldMapper; import org.elasticsearch.indices.SystemIndexDescriptor; import org.elasticsearch.inference.InferenceServiceExtension; import org.elasticsearch.inference.InferenceServiceRegistry; @@ -78,7 +77,6 @@ import org.elasticsearch.xpack.inference.highlight.SemanticTextHighlighter; import org.elasticsearch.xpack.inference.logging.ThrottlerManager; import org.elasticsearch.xpack.inference.mapper.OffsetSourceFieldMapper; -import org.elasticsearch.xpack.inference.mapper.SemanticInferenceMetadataFieldsMapper; import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; import org.elasticsearch.xpack.inference.queries.SemanticMatchQueryRewriteInterceptor; import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; @@ -287,7 +285,7 @@ public Collection createComponents(PluginServices services) { } inferenceServiceRegistry.set(registry); - var actionFilter = new ShardBulkInferenceActionFilter(services.clusterService(), registry, modelRegistry); + var actionFilter = new ShardBulkInferenceActionFilter(registry, modelRegistry); shardBulkInferenceActionFilter.set(actionFilter); var meterRegistry = services.telemetryProvider().getMeterRegistry(); @@ -421,11 +419,6 @@ public void close() { IOUtils.closeWhileHandlingException(inferenceServiceRegistry.get(), throttlerToClose); } - @Override - public Map getMetadataMappers() { - return Map.of(SemanticInferenceMetadataFieldsMapper.NAME, SemanticInferenceMetadataFieldsMapper.PARSER); - } - @Override public Map getMappers() { return Map.of( diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index b76a39a0f2ac2..a9195ea24af3a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -24,14 +24,11 @@ import org.elasticsearch.action.support.RefCountingRunnable; import org.elasticsearch.action.update.UpdateRequest; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; -import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.util.concurrent.AtomicArray; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.TimeValue; -import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.inference.ChunkedInference; import org.elasticsearch.inference.InferenceService; import org.elasticsearch.inference.InferenceServiceRegistry; @@ -43,11 +40,11 @@ import org.elasticsearch.xpack.core.inference.results.ChunkedInferenceError; import org.elasticsearch.xpack.inference.mapper.SemanticTextField; import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; -import org.elasticsearch.xpack.inference.mapper.SemanticTextUtils; import org.elasticsearch.xpack.inference.registry.ModelRegistry; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -56,6 +53,8 @@ import java.util.Map; import java.util.stream.Collectors; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.toSemanticTextFieldChunks; + /** * A {@link MappedActionFilter} that intercepts {@link BulkShardRequest} to apply inference on fields specified * as {@link SemanticTextFieldMapper} in the index mapping. For each semantic text field referencing fields in @@ -69,26 +68,15 @@ public class ShardBulkInferenceActionFilter implements MappedActionFilter { protected static final int DEFAULT_BATCH_SIZE = 512; - private final ClusterService clusterService; private final InferenceServiceRegistry inferenceServiceRegistry; private final ModelRegistry modelRegistry; private final int batchSize; - public ShardBulkInferenceActionFilter( - ClusterService clusterService, - InferenceServiceRegistry inferenceServiceRegistry, - ModelRegistry modelRegistry - ) { - this(clusterService, inferenceServiceRegistry, modelRegistry, DEFAULT_BATCH_SIZE); + public ShardBulkInferenceActionFilter(InferenceServiceRegistry inferenceServiceRegistry, ModelRegistry modelRegistry) { + this(inferenceServiceRegistry, modelRegistry, DEFAULT_BATCH_SIZE); } - public ShardBulkInferenceActionFilter( - ClusterService clusterService, - InferenceServiceRegistry inferenceServiceRegistry, - ModelRegistry modelRegistry, - int batchSize - ) { - this.clusterService = clusterService; + public ShardBulkInferenceActionFilter(InferenceServiceRegistry inferenceServiceRegistry, ModelRegistry modelRegistry, int batchSize) { this.inferenceServiceRegistry = inferenceServiceRegistry; this.modelRegistry = modelRegistry; this.batchSize = batchSize; @@ -124,8 +112,7 @@ private void processBulkShardRequest( BulkShardRequest bulkShardRequest, Runnable onCompletion ) { - var index = clusterService.state().getMetadata().index(bulkShardRequest.index()); - new AsyncBulkShardInferenceAction(index.getCreationVersion(), fieldInferenceMap, bulkShardRequest, onCompletion).run(); + new AsyncBulkShardInferenceAction(fieldInferenceMap, bulkShardRequest, onCompletion).run(); } private record InferenceProvider(InferenceService service, Model model) {} @@ -134,29 +121,26 @@ private record InferenceProvider(InferenceService service, Model model) {} * A field inference request on a single input. * @param index The index of the request in the original bulk request. * @param field The target field. - * @param sourceField The source field. * @param input The input to run inference on. * @param inputOrder The original order of the input. - * @param offsetAdjustment The adjustment to apply to the chunk text offsets. + * @param isOriginalFieldInput Whether the input is part of the original values of the field. */ - private record FieldInferenceRequest(int index, String field, String sourceField, String input, int inputOrder, int offsetAdjustment) {} + private record FieldInferenceRequest(int index, String field, String input, int inputOrder, boolean isOriginalFieldInput) {} /** * The field inference response. * @param field The target field. - * @param sourceField The input that was used to run inference. * @param input The input that was used to run inference. * @param inputOrder The original order of the input. - * @param offsetAdjustment The adjustment to apply to the chunk text offsets. + * @param isOriginalFieldInput Whether the input is part of the original values of the field. * @param model The model used to run inference. * @param chunkedResults The actual results. */ private record FieldInferenceResponse( String field, - String sourceField, String input, int inputOrder, - int offsetAdjustment, + boolean isOriginalFieldInput, Model model, ChunkedInference chunkedResults ) {} @@ -181,19 +165,16 @@ void addFailure(Exception exc) { } private class AsyncBulkShardInferenceAction implements Runnable { - private final IndexVersion indexCreatedVersion; private final Map fieldInferenceMap; private final BulkShardRequest bulkShardRequest; private final Runnable onCompletion; private final AtomicArray inferenceResults; private AsyncBulkShardInferenceAction( - IndexVersion indexCreatedVersion, Map fieldInferenceMap, BulkShardRequest bulkShardRequest, Runnable onCompletion ) { - this.indexCreatedVersion = indexCreatedVersion; this.fieldInferenceMap = fieldInferenceMap; this.bulkShardRequest = bulkShardRequest; this.inferenceResults = new AtomicArray<>(bulkShardRequest.items().length); @@ -314,10 +295,9 @@ public void onResponse(List results) { acc.addOrUpdateResponse( new FieldInferenceResponse( request.field(), - request.sourceField(), request.input(), request.inputOrder(), - request.offsetAdjustment(), + request.isOriginalFieldInput(), inferenceProvider.model, result ) @@ -377,7 +357,8 @@ private void addInferenceResponseFailure(int id, Exception failure) { /** * Applies the {@link FieldInferenceResponseAccumulator} to the provided {@link BulkItemRequest}. * If the response contains failures, the bulk item request is marked as failed for the downstream action. - * Otherwise, the source of the request is augmented with the field inference results. + * Otherwise, the source of the request is augmented with the field inference results under the + * {@link SemanticTextField#INFERENCE_FIELD} field. */ private void applyInferenceResponses(BulkItemRequest item, FieldInferenceResponseAccumulator response) throws IOException { if (response.failures().isEmpty() == false) { @@ -389,50 +370,25 @@ private void applyInferenceResponses(BulkItemRequest item, FieldInferenceRespons final IndexRequest indexRequest = getIndexRequestOrNull(item.request()); var newDocMap = indexRequest.sourceAsMap(); - Map inferenceFieldsMap = new HashMap<>(); - final boolean addMetadataField = InferenceMetadataFieldsMapper.isEnabled(indexCreatedVersion); for (var entry : response.responses.entrySet()) { var fieldName = entry.getKey(); var responses = entry.getValue(); var model = responses.get(0).model(); // ensure that the order in the original field is consistent in case of multiple inputs Collections.sort(responses, Comparator.comparingInt(FieldInferenceResponse::inputOrder)); - Map> chunkMap = new LinkedHashMap<>(); - for (var resp : responses) { - var lst = chunkMap.computeIfAbsent(resp.sourceField, k -> new ArrayList<>()); - lst.addAll( - SemanticTextField.toSemanticTextFieldChunks( - resp.input, - resp.offsetAdjustment, - resp.chunkedResults, - indexRequest.getContentType(), - addMetadataField - ) - ); - } - List inputs = responses.stream() - .filter(r -> r.sourceField().equals(fieldName)) - .map(r -> r.input) - .collect(Collectors.toList()); + List inputs = responses.stream().filter(r -> r.isOriginalFieldInput).map(r -> r.input).collect(Collectors.toList()); + List results = responses.stream().map(r -> r.chunkedResults).collect(Collectors.toList()); var result = new SemanticTextField( - indexCreatedVersion, fieldName, - addMetadataField ? null : inputs, + inputs, new SemanticTextField.InferenceResult( model.getInferenceEntityId(), new SemanticTextField.ModelSettings(model), - chunkMap + toSemanticTextFieldChunks(results, indexRequest.getContentType()) ), indexRequest.getContentType() ); - if (addMetadataField) { - inferenceFieldsMap.put(fieldName, result); - } else { - SemanticTextUtils.insertValue(fieldName, newDocMap, result); - } - } - if (addMetadataField) { - newDocMap.put(InferenceMetadataFieldsMapper.NAME, inferenceFieldsMap); + SemanticTextFieldMapper.insertValue(fieldName, newDocMap, result); } indexRequest.source(newDocMap, indexRequest.getContentType()); } @@ -447,8 +403,6 @@ private void applyInferenceResponses(BulkItemRequest item, FieldInferenceRespons * TODO: We should validate the settings for pre-existing results here and apply the inference only if they differ? */ private Map> createFieldInferenceRequests(BulkShardRequest bulkShardRequest) { - final boolean useInferenceMetadataFieldsFormat = InferenceMetadataFieldsMapper.isEnabled(indexCreatedVersion); - Map> fieldRequestsMap = new LinkedHashMap<>(); for (int itemIndex = 0; itemIndex < bulkShardRequest.items().length; itemIndex++) { var item = bulkShardRequest.items()[itemIndex]; @@ -483,30 +437,17 @@ private Map> createFieldInferenceRequests(Bu for (var entry : fieldInferenceMap.values()) { String field = entry.getName(); String inferenceId = entry.getInferenceId(); - - if (useInferenceMetadataFieldsFormat) { - var inferenceMetadataFieldsValue = XContentMapValues.extractValue( - InferenceMetadataFieldsMapper.NAME + "." + field, - docMap - ); - if (inferenceMetadataFieldsValue != null) { - // Inference has already been computed - continue; - } - } else { - var originalFieldValue = XContentMapValues.extractValue(field, docMap); - if (originalFieldValue instanceof Map || (originalFieldValue == null && entry.getSourceFields().length == 1)) { - // Inference has already been computed, or there is no inference required. - continue; - } + var originalFieldValue = XContentMapValues.extractValue(field, docMap); + if (originalFieldValue instanceof Map || (originalFieldValue == null && entry.getSourceFields().length == 1)) { + // Inference has already been computed, or there is no inference required. + continue; } - int order = 0; for (var sourceField : entry.getSourceFields()) { - // TODO: Detect when the field is provided with an explicit null value + boolean isOriginalFieldInput = sourceField.equals(field); var valueObj = XContentMapValues.extractValue(sourceField, docMap); if (valueObj == null) { - if (isUpdateRequest && (useInferenceMetadataFieldsFormat == false)) { + if (isUpdateRequest) { addInferenceResponseFailure( item.id(), new ElasticsearchStatusException( @@ -523,21 +464,14 @@ private Map> createFieldInferenceRequests(Bu ensureResponseAccumulatorSlot(itemIndex); final List values; try { - values = SemanticTextUtils.nodeStringValues(field, valueObj); + values = nodeStringValues(field, valueObj); } catch (Exception exc) { addInferenceResponseFailure(item.id(), exc); break; } - List fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>()); - int offsetAdjustment = 0; - for (String v : values) { - fieldRequests.add(new FieldInferenceRequest(itemIndex, field, sourceField, v, order++, offsetAdjustment)); - - // When using the inference metadata fields format, all the input values are concatenated so that the - // chunk text offsets are expressed in the context of a single string. Calculate the offset adjustment - // to apply to account for this. - offsetAdjustment += v.length() + 1; // Add one for separator char length + for (var v : values) { + fieldRequests.add(new FieldInferenceRequest(itemIndex, field, v, order++, isOriginalFieldInput)); } } } @@ -546,6 +480,41 @@ private Map> createFieldInferenceRequests(Bu } } + /** + * This method converts the given {@code valueObj} into a list of strings. + * If {@code valueObj} is not a string or a collection of strings, it throws an ElasticsearchStatusException. + */ + private static List nodeStringValues(String field, Object valueObj) { + if (valueObj instanceof Number || valueObj instanceof Boolean) { + return List.of(valueObj.toString()); + } else if (valueObj instanceof String value) { + return List.of(value); + } else if (valueObj instanceof Collection values) { + List valuesString = new ArrayList<>(); + for (var v : values) { + if (v instanceof Number || v instanceof Boolean) { + valuesString.add(v.toString()); + } else if (v instanceof String value) { + valuesString.add(value); + } else { + throw new ElasticsearchStatusException( + "Invalid format for field [{}], expected [String] got [{}]", + RestStatus.BAD_REQUEST, + field, + valueObj.getClass().getSimpleName() + ); + } + } + return valuesString; + } + throw new ElasticsearchStatusException( + "Invalid format for field [{}], expected [String] got [{}]", + RestStatus.BAD_REQUEST, + field, + valueObj.getClass().getSimpleName() + ); + } + static IndexRequest getIndexRequestOrNull(DocWriteRequest docWriteRequest) { if (docWriteRequest instanceof IndexRequest indexRequest) { return indexRequest; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java index e38abf843acee..f2bfa72ec617a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java @@ -22,7 +22,6 @@ import org.apache.lucene.search.Weight; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.support.XContentMapValues; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DenseVectorFieldType; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.SparseVectorFieldType; @@ -54,9 +53,8 @@ private record OffsetAndScore(int offset, float score) {} @Override public boolean canHighlight(MappedFieldType fieldType) { - if (fieldType instanceof SemanticTextFieldMapper.SemanticTextFieldType semanticTextFieldType) { - // TODO: Implement highlighting when using inference metadata fields - return InferenceMetadataFieldsMapper.isEnabled(semanticTextFieldType.getIndexVersionCreated()) == false; + if (fieldType instanceof SemanticTextFieldMapper.SemanticTextFieldType) { + return true; } return false; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticInferenceMetadataFieldsMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticInferenceMetadataFieldsMapper.java deleted file mode 100644 index cd37c06adb36c..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticInferenceMetadataFieldsMapper.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.mapper; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.join.BitSetProducer; -import org.elasticsearch.common.xcontent.XContentParserUtils; -import org.elasticsearch.index.mapper.ContentPath; -import org.elasticsearch.index.mapper.DocumentParserContext; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.MappingLookup; -import org.elasticsearch.index.mapper.ValueFetcher; -import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.search.fetch.StoredFieldsSpec; -import org.elasticsearch.search.lookup.Source; -import org.elasticsearch.xcontent.XContentLocation; -import org.elasticsearch.xcontent.XContentParser; - -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Function; - -/** - * An {@link InferenceMetadataFieldsMapper} that delegates parsing of underlying fields - * to the corresponding {@link SemanticTextFieldMapper}. - */ -public class SemanticInferenceMetadataFieldsMapper extends InferenceMetadataFieldsMapper { - private static final SemanticInferenceMetadataFieldsMapper INSTANCE = new SemanticInferenceMetadataFieldsMapper(); - - public static final TypeParser PARSER = new FixedTypeParser( - c -> InferenceMetadataFieldsMapper.isEnabled(c.indexVersionCreated()) ? INSTANCE : null - ); - - static class FieldType extends InferenceMetadataFieldType { - private static final FieldType INSTANCE = new FieldType(); - - FieldType() { - super(); - } - - @Override - public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - if (InferenceMetadataFieldsMapper.isEnabled(context.getIndexSettings().getIndexVersionCreated()) == false) { - return ValueFetcher.EMPTY; - } - return valueFetcher(context.getMappingLookup(), context::bitsetFilter, context.searcher()); - } - - @Override - public ValueFetcher valueFetcher(MappingLookup mappingLookup, Function bitSetCache, IndexSearcher searcher) { - Map fieldFetchers = new HashMap<>(); - for (var inferenceField : mappingLookup.inferenceFields().keySet()) { - MappedFieldType ft = mappingLookup.getFieldType(inferenceField); - if (ft instanceof SemanticTextFieldMapper.SemanticTextFieldType semanticTextFieldType) { - fieldFetchers.put(inferenceField, semanticTextFieldType.valueFetcherWithInferenceResults(bitSetCache, searcher)); - } else { - throw new IllegalArgumentException("Illegal format for field [" + name() + "], got " + ft.typeName()); - } - } - if (fieldFetchers.isEmpty()) { - return ValueFetcher.EMPTY; - } - return new ValueFetcher() { - @Override - public void setNextReader(LeafReaderContext context) { - fieldFetchers.values().forEach(f -> f.setNextReader(context)); - } - - @Override - public List fetchValues(Source source, int doc, List ignoredValues) throws IOException { - Map result = new HashMap<>(); - for (var entry : fieldFetchers.entrySet()) { - var values = entry.getValue().fetchValues(source, doc, ignoredValues); - if (values.size() > 0) { - assert values.size() == 1; - result.put(entry.getKey(), values.get(0)); - } - } - return result.isEmpty() ? List.of() : List.of(result); - } - - @Override - public StoredFieldsSpec storedFieldsSpec() { - return StoredFieldsSpec.NO_REQUIREMENTS; - } - }; - } - - @Override - public String typeName() { - return CONTENT_TYPE; - } - - @Override - public Query termQuery(Object value, SearchExecutionContext context) { - throw new QueryShardException( - context, - "[" + name() + "] field which is of type [" + typeName() + "], does not support term queries" - ); - } - } - - private SemanticInferenceMetadataFieldsMapper() { - super(FieldType.INSTANCE); - } - - @Override - protected String contentType() { - return CONTENT_TYPE; - } - - @Override - protected boolean supportsParsingObject() { - return true; - } - - @Override - protected void parseCreateField(DocumentParserContext context) throws IOException { - final boolean isWithinLeaf = context.path().isWithinLeafObject(); - try { - // make sure that we don't expand dots in field names while parsing - context.path().setWithinLeafObject(true); - XContentParser parser = context.parser(); - XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); - while (parser.nextToken() != XContentParser.Token.END_OBJECT) { - XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); - String fieldName = parser.currentName(); - - // Set the path to that of semantic text field so the parser acts as if we are parsing the semantic text field value - // directly. We can safely split on all "." chars because semantic text fields cannot be used when subobjects == false. - String[] fieldNameParts = fieldName.split("\\."); - setPath(context.path(), fieldNameParts); - - var parent = context.parent().findParentMapper(fieldName); - if (parent == null) { - throw new IllegalArgumentException("Field [" + fieldName + "] does not have a parent mapper"); - } - String suffix = parent != context.parent() ? fieldName.substring(parent.fullPath().length() + 1) : fieldName; - var mapper = parent.getMapper(suffix); - if (mapper instanceof SemanticTextFieldMapper fieldMapper) { - XContentLocation xContentLocation = context.parser().getTokenLocation(); - var input = fieldMapper.parseSemanticTextField(context); - if (input != null) { - fieldMapper.parseCreateFieldFromContext(context, input, xContentLocation); - } - } else { - throw new IllegalArgumentException( - "Field [" + fieldName + "] is not a [" + SemanticTextFieldMapper.CONTENT_TYPE + "] field" - ); - } - } - } finally { - context.path().setWithinLeafObject(isWithinLeaf); - setPath(context.path(), new String[] { InferenceMetadataFieldsMapper.NAME }); - } - } - - private static void setPath(ContentPath contentPath, String[] newPath) { - while (contentPath.length() > 0) { - contentPath.remove(); - } - - for (String pathPart : newPath) { - contentPath.add(pathPart); - } - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java index 5f63d65ae5062..d651729dee259 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java @@ -10,12 +10,8 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.common.xcontent.XContentParserUtils; import org.elasticsearch.common.xcontent.support.XContentMapValues; -import org.elasticsearch.core.Nullable; -import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; +import org.elasticsearch.core.Tuple; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.ChunkedInference; import org.elasticsearch.inference.Model; @@ -35,9 +31,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -53,18 +46,13 @@ * the inference results under the {@link SemanticTextField#INFERENCE_FIELD}. * * @param fieldName The original field name. - * @param originalValues The original values associated with the field name for indices created before - * {@link IndexVersions#INFERENCE_METADATA_FIELDS}, null otherwise. + * @param originalValues The original values associated with the field name. * @param inference The inference result. * @param contentType The {@link XContentType} used to store the embeddings chunks. */ -public record SemanticTextField( - IndexVersion indexCreatedVersion, - String fieldName, - @Nullable List originalValues, - InferenceResult inference, - XContentType contentType -) implements ToXContentObject { +public record SemanticTextField(String fieldName, List originalValues, InferenceResult inference, XContentType contentType) + implements + ToXContentObject { static final String TEXT_FIELD = "text"; static final String INFERENCE_FIELD = "inference"; @@ -73,20 +61,15 @@ public record SemanticTextField( static final String CHUNKS_FIELD = "chunks"; static final String CHUNKED_EMBEDDINGS_FIELD = "embeddings"; public static final String CHUNKED_TEXT_FIELD = "text"; - static final String CHUNKED_OFFSET_FIELD = "offset"; - static final String CHUNKED_START_OFFSET_FIELD = "start_offset"; - static final String CHUNKED_END_OFFSET_FIELD = "end_offset"; static final String MODEL_SETTINGS_FIELD = "model_settings"; static final String TASK_TYPE_FIELD = "task_type"; static final String DIMENSIONS_FIELD = "dimensions"; static final String SIMILARITY_FIELD = "similarity"; static final String ELEMENT_TYPE_FIELD = "element_type"; - public record InferenceResult(String inferenceId, ModelSettings modelSettings, Map> chunks) {} + public record InferenceResult(String inferenceId, ModelSettings modelSettings, List chunks) {} - public record Chunk(@Nullable String text, int startOffset, int endOffset, BytesReference rawEmbeddings) {} - - public record Offset(String sourceFieldName, int startOffset, int endOffset) {} + record Chunk(String text, BytesReference rawEmbeddings) {} public record ModelSettings( TaskType taskType, @@ -204,14 +187,12 @@ public static String getEmbeddingsFieldName(String fieldName) { return getChunksFieldName(fieldName) + "." + CHUNKED_EMBEDDINGS_FIELD; } - public static String getOffsetsFieldName(String fieldName) { - return getChunksFieldName(fieldName) + "." + CHUNKED_OFFSET_FIELD; + static SemanticTextField parse(XContentParser parser, Tuple context) throws IOException { + return SEMANTIC_TEXT_FIELD_PARSER.parse(parser, context); } - record ParserContext(IndexVersion indexVersionCreated, String fieldName, XContentType xContentType) {} - - static SemanticTextField parse(XContentParser parser, ParserContext context) throws IOException { - return SEMANTIC_TEXT_FIELD_PARSER.parse(parser, context); + static ModelSettings parseModelSettings(XContentParser parser) throws IOException { + return MODEL_SETTINGS_PARSER.parse(parser, null); } static ModelSettings parseModelSettingsFromMap(Object node) { @@ -226,104 +207,63 @@ static ModelSettings parseModelSettingsFromMap(Object node) { map, XContentType.JSON ); - return MODEL_SETTINGS_PARSER.parse(parser, null); + return parseModelSettings(parser); } catch (Exception exc) { throw new ElasticsearchException(exc); } } - @Override - public List originalValues() { - return originalValues != null ? originalValues : Collections.emptyList(); - } - @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - final boolean useInferenceMetadataFieldsFormat = InferenceMetadataFieldsMapper.isEnabled(indexCreatedVersion); - builder.startObject(); - List originalValues = originalValues(); - if (useInferenceMetadataFieldsFormat == false && originalValues.isEmpty() == false) { + if (originalValues.isEmpty() == false) { builder.field(TEXT_FIELD, originalValues.size() == 1 ? originalValues.get(0) : originalValues); } builder.startObject(INFERENCE_FIELD); builder.field(INFERENCE_ID_FIELD, inference.inferenceId); builder.field(MODEL_SETTINGS_FIELD, inference.modelSettings); - if (useInferenceMetadataFieldsFormat) { - builder.startObject(CHUNKS_FIELD); - } else { - builder.startArray(CHUNKS_FIELD); - } - for (var entry : inference.chunks.entrySet()) { - if (useInferenceMetadataFieldsFormat) { - builder.startArray(entry.getKey()); - } - for (var chunk : entry.getValue()) { - builder.startObject(); - if (useInferenceMetadataFieldsFormat) { - builder.field(CHUNKED_START_OFFSET_FIELD, chunk.startOffset); - builder.field(CHUNKED_END_OFFSET_FIELD, chunk.endOffset); - } else { - builder.field(TEXT_FIELD, chunk.text); - } - XContentParser parser = XContentHelper.createParserNotCompressed( - XContentParserConfiguration.EMPTY, - chunk.rawEmbeddings, - contentType - ); - builder.field(CHUNKED_EMBEDDINGS_FIELD).copyCurrentStructure(parser); - builder.endObject(); - } - if (useInferenceMetadataFieldsFormat) { - builder.endArray(); - } - } - if (useInferenceMetadataFieldsFormat) { + builder.startArray(CHUNKS_FIELD); + for (var chunk : inference.chunks) { + builder.startObject(); + builder.field(CHUNKED_TEXT_FIELD, chunk.text); + XContentParser parser = XContentHelper.createParserNotCompressed( + XContentParserConfiguration.EMPTY, + chunk.rawEmbeddings, + contentType + ); + builder.field(CHUNKED_EMBEDDINGS_FIELD).copyCurrentStructure(parser); builder.endObject(); - } else { - builder.endArray(); } + builder.endArray(); builder.endObject(); builder.endObject(); return builder; } @SuppressWarnings("unchecked") - private static final ConstructingObjectParser SEMANTIC_TEXT_FIELD_PARSER = - new ConstructingObjectParser<>(SemanticTextFieldMapper.CONTENT_TYPE, true, (args, context) -> { - List originalValues = (List) args[0]; - if (InferenceMetadataFieldsMapper.isEnabled(context.indexVersionCreated)) { - if (originalValues != null && originalValues.isEmpty() == false) { - throw new IllegalArgumentException("Unknown field [" + TEXT_FIELD + "]"); - } - originalValues = null; - } - return new SemanticTextField( - context.indexVersionCreated(), - context.fieldName(), - originalValues, + private static final ConstructingObjectParser> SEMANTIC_TEXT_FIELD_PARSER = + new ConstructingObjectParser<>( + SemanticTextFieldMapper.CONTENT_TYPE, + true, + (args, context) -> new SemanticTextField( + context.v1(), + (List) (args[0] == null ? List.of() : args[0]), (InferenceResult) args[1], - context.xContentType() - ); - }); + context.v2() + ) + ); @SuppressWarnings("unchecked") - private static final ConstructingObjectParser INFERENCE_RESULT_PARSER = new ConstructingObjectParser<>( + private static final ConstructingObjectParser INFERENCE_RESULT_PARSER = new ConstructingObjectParser<>( INFERENCE_FIELD, true, - args -> new InferenceResult((String) args[0], (ModelSettings) args[1], (Map>) args[2]) + args -> new InferenceResult((String) args[0], (ModelSettings) args[1], (List) args[2]) ); - private static final ConstructingObjectParser CHUNKS_PARSER = new ConstructingObjectParser<>( + private static final ConstructingObjectParser CHUNKS_PARSER = new ConstructingObjectParser<>( CHUNKS_FIELD, true, - (args, context) -> { - String text = (String) args[0]; - if (InferenceMetadataFieldsMapper.isEnabled(context.indexVersionCreated) == false && text == null) { - throw new IllegalArgumentException("Missing chunk text"); - } - return new Chunk(text, args[1] != null ? (int) args[1] : -1, args[2] != null ? (int) args[2] : -1, (BytesReference) args[3]); - } + args -> new Chunk((String) args[0], (BytesReference) args[1]) ); private static final ConstructingObjectParser MODEL_SETTINGS_PARSER = new ConstructingObjectParser<>( @@ -344,27 +284,15 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws SEMANTIC_TEXT_FIELD_PARSER.declareStringArray(optionalConstructorArg(), new ParseField(TEXT_FIELD)); SEMANTIC_TEXT_FIELD_PARSER.declareObject( constructorArg(), - (p, c) -> INFERENCE_RESULT_PARSER.parse(p, c), + (p, c) -> INFERENCE_RESULT_PARSER.parse(p, null), new ParseField(INFERENCE_FIELD) ); INFERENCE_RESULT_PARSER.declareString(constructorArg(), new ParseField(INFERENCE_ID_FIELD)); - INFERENCE_RESULT_PARSER.declareObject( - constructorArg(), - (p, c) -> MODEL_SETTINGS_PARSER.parse(p, null), - new ParseField(MODEL_SETTINGS_FIELD) - ); - INFERENCE_RESULT_PARSER.declareField(constructorArg(), (p, c) -> { - if (InferenceMetadataFieldsMapper.isEnabled(c.indexVersionCreated)) { - return parseChunksMap(p, c); - } else { - return Map.of(c.fieldName, parseChunksArrayLegacy(p, c)); - } - }, new ParseField(CHUNKS_FIELD), ObjectParser.ValueType.OBJECT_ARRAY); + INFERENCE_RESULT_PARSER.declareObject(constructorArg(), MODEL_SETTINGS_PARSER, new ParseField(MODEL_SETTINGS_FIELD)); + INFERENCE_RESULT_PARSER.declareObjectArray(constructorArg(), CHUNKS_PARSER, new ParseField(CHUNKS_FIELD)); - CHUNKS_PARSER.declareString(optionalConstructorArg(), new ParseField(TEXT_FIELD)); - CHUNKS_PARSER.declareInt(optionalConstructorArg(), new ParseField(CHUNKED_START_OFFSET_FIELD)); - CHUNKS_PARSER.declareInt(optionalConstructorArg(), new ParseField(CHUNKED_END_OFFSET_FIELD)); + CHUNKS_PARSER.declareString(constructorArg(), new ParseField(CHUNKED_TEXT_FIELD)); CHUNKS_PARSER.declareField(constructorArg(), (p, c) -> { XContentBuilder b = XContentBuilder.builder(p.contentType().xContent()); b.copyCurrentStructure(p); @@ -377,63 +305,18 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws MODEL_SETTINGS_PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), new ParseField(ELEMENT_TYPE_FIELD)); } - private static Map> parseChunksMap(XContentParser parser, ParserContext context) throws IOException { - Map> resultMap = new LinkedHashMap<>(); - XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); - while (parser.nextToken() != XContentParser.Token.END_OBJECT) { - XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); - String fieldName = parser.currentName(); - XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.nextToken(), parser); - var chunks = resultMap.computeIfAbsent(fieldName, k -> new ArrayList<>()); - while (parser.nextToken() != XContentParser.Token.END_ARRAY) { - chunks.add(CHUNKS_PARSER.parse(parser, context)); - } - } - return resultMap; - } - - private static List parseChunksArrayLegacy(XContentParser parser, ParserContext context) throws IOException { - List results = new ArrayList<>(); - XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.currentToken(), parser); - while (parser.nextToken() != XContentParser.Token.END_ARRAY) { - results.add(CHUNKS_PARSER.parse(parser, context)); - } - return results; - } - /** * Converts the provided {@link ChunkedInference} into a list of {@link Chunk}. */ - public static List toSemanticTextFieldChunks( - String input, - int offsetAdjustment, - ChunkedInference results, - XContentType contentType, - boolean useInferenceMetadataFieldsFormat - ) throws IOException { + public static List toSemanticTextFieldChunks(List results, XContentType contentType) throws IOException { List chunks = new ArrayList<>(); - Iterator it = results.chunksAsMatchedTextAndByteReference(contentType.xContent()); - while (it.hasNext()) { - chunks.add(toSemanticTextFieldChunk(input, offsetAdjustment, it.next(), useInferenceMetadataFieldsFormat)); + for (var result : results) { + for (var it = result.chunksAsMatchedTextAndByteReference(contentType.xContent()); it.hasNext();) { + var chunkAsByteReference = it.next(); + chunks.add(new Chunk(chunkAsByteReference.matchedText(), chunkAsByteReference.bytesReference())); + } } return chunks; } - public static Chunk toSemanticTextFieldChunk( - String input, - int offsetAdjustment, - ChunkedInference.Chunk chunk, - boolean useInferenceMetadataFieldsFormat - ) { - // TODO: Use offsets from ChunkedInferenceServiceResults - // TODO: When using legacy semantic text format, build chunk text from offsets - assert chunk.matchedText() != null; // TODO: Remove once offsets are available from chunk - int startOffset = useInferenceMetadataFieldsFormat ? input.indexOf(chunk.matchedText()) + offsetAdjustment : -1; - return new Chunk( - useInferenceMetadataFieldsFormat ? null : chunk.matchedText(), - useInferenceMetadataFieldsFormat ? startOffset : -1, - useInferenceMetadataFieldsFormat ? startOffset + chunk.matchedText().length() : -1, - chunk.bytesReference() - ); - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index b695cf2ee2fb2..cda77233bdfd4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -8,23 +8,16 @@ package org.elasticsearch.xpack.inference.mapper; import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; import org.apache.lucene.search.join.BitSetProducer; import org.apache.lucene.search.join.ScoreMode; -import org.apache.lucene.util.BitSet; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.common.xcontent.XContentParserUtils; -import org.elasticsearch.core.CheckedConsumer; +import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Tuple; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; @@ -36,7 +29,6 @@ import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.InferenceFieldMapper; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; @@ -46,7 +38,6 @@ import org.elasticsearch.index.mapper.NestedObjectMapper; import org.elasticsearch.index.mapper.ObjectMapper; import org.elasticsearch.index.mapper.SimpleMappedFieldType; -import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.mapper.ValueFetcher; @@ -58,25 +49,19 @@ import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.inference.InferenceResults; import org.elasticsearch.inference.SimilarityMeasure; -import org.elasticsearch.search.fetch.StoredFieldsSpec; -import org.elasticsearch.search.lookup.Source; import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentLocation; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; -import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder; import java.io.IOException; -import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -86,7 +71,7 @@ import static org.elasticsearch.search.SearchService.DEFAULT_SIZE; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_OFFSET_FIELD; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_TEXT_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKS_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.INFERENCE_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.INFERENCE_ID_FIELD; @@ -95,7 +80,6 @@ import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.TEXT_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getChunksFieldName; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getEmbeddingsFieldName; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getOffsetsFieldName; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getOriginalTextFieldName; import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.DEFAULT_ELSER_ID; @@ -119,11 +103,12 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie private final IndexSettings indexSettings; public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, c::bitSetProducer, c.getIndexSettings()), + (n, c) -> new Builder(n, c.indexVersionCreated(), c::bitSetProducer, c.getIndexSettings()), List.of(notInMultiFields(CONTENT_TYPE), notFromDynamicTemplates(CONTENT_TYPE)) ); public static class Builder extends FieldMapper.Builder { + private final IndexVersion indexVersionCreated; private final IndexSettings indexSettings; private final Parameter inferenceId = Parameter.stringParam( @@ -167,17 +152,28 @@ public static class Builder extends FieldMapper.Builder { private Function inferenceFieldBuilder; public static Builder from(SemanticTextFieldMapper mapper) { - Builder builder = new Builder(mapper.leafName(), mapper.fieldType().getChunksField().bitsetProducer(), mapper.indexSettings); + Builder builder = new Builder( + mapper.leafName(), + mapper.fieldType().indexVersionCreated, + mapper.fieldType().getChunksField().bitsetProducer(), + mapper.indexSettings + ); builder.init(mapper); return builder; } - public Builder(String name, Function bitSetProducer, IndexSettings indexSettings) { + public Builder( + String name, + IndexVersion indexVersionCreated, + Function bitSetProducer, + IndexSettings indexSettings + ) { super(name); + this.indexVersionCreated = indexVersionCreated; this.indexSettings = indexSettings; this.inferenceFieldBuilder = c -> createInferenceField( c, - indexSettings.getIndexVersionCreated(), + indexVersionCreated, modelSettings.get(), bitSetProducer, indexSettings @@ -241,7 +237,7 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) { searchInferenceId.getValue(), modelSettings.getValue(), inferenceField, - indexSettings.getIndexVersionCreated(), + indexVersionCreated, meta.getValue() ), builderParams(this, context), @@ -293,48 +289,21 @@ public FieldMapper.Builder getMergeBuilder() { @Override protected void parseCreateField(DocumentParserContext context) throws IOException { - final XContentParser parser = context.parser(); - final XContentLocation xContentLocation = parser.getTokenLocation(); - - if (InferenceMetadataFieldsMapper.isEnabled(context.indexSettings().getIndexVersionCreated())) { - // Detect if field value is an object, which we don't support parsing - if (parser.currentToken() == XContentParser.Token.START_OBJECT) { - throw new DocumentParsingException( - xContentLocation, - "[" + CONTENT_TYPE + "] field [" + fullPath() + "] does not support object values" - ); - } - - // ignore the rest of the field value - parser.skipChildren(); - return; - } - - final SemanticTextField field = parseSemanticTextField(context); - if (field != null) { - parseCreateFieldFromContext(context, field, xContentLocation); - } - } - - SemanticTextField parseSemanticTextField(DocumentParserContext context) throws IOException { XContentParser parser = context.parser(); if (parser.currentToken() == XContentParser.Token.VALUE_NULL) { - return null; + return; } + + XContentLocation xContentLocation = parser.getTokenLocation(); + final SemanticTextField field; boolean isWithinLeaf = context.path().isWithinLeafObject(); try { context.path().setWithinLeafObject(true); - return SemanticTextField.parse( - context.parser(), - new SemanticTextField.ParserContext(indexSettings.getIndexVersionCreated(), fullPath(), context.parser().contentType()) - ); + field = SemanticTextField.parse(parser, new Tuple<>(fullPath(), context.parser().contentType())); } finally { context.path().setWithinLeafObject(isWithinLeaf); } - } - void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextField field, XContentLocation xContentLocation) - throws IOException { final String fullFieldName = fieldType().name(); if (field.inference().inferenceId().equals(fieldType().getInferenceId()) == false) { throw new DocumentParsingException( @@ -353,7 +322,12 @@ void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextFiel final SemanticTextFieldMapper mapper; if (fieldType().getModelSettings() == null) { context.path().remove(); - Builder builder = (Builder) new Builder(leafName(), fieldType().getChunksField().bitsetProducer(), indexSettings).init(this); + Builder builder = (Builder) new Builder( + leafName(), + fieldType().indexVersionCreated, + fieldType().getChunksField().bitsetProducer(), + indexSettings + ).init(this); try { mapper = builder.setModelSettings(field.inference().modelSettings()) .setInferenceId(field.inference().inferenceId()) @@ -383,42 +357,17 @@ void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextFiel var chunksField = mapper.fieldType().getChunksField(); var embeddingsField = mapper.fieldType().getEmbeddingsField(); - var offsetsField = mapper.fieldType().getOffsetsField(); - for (var entry : field.inference().chunks().entrySet()) { - for (var chunk : entry.getValue()) { - var nestedContext = context.createNestedContext(chunksField); - try ( - XContentParser subParser = XContentHelper.createParserNotCompressed( - XContentParserConfiguration.EMPTY, - chunk.rawEmbeddings(), - context.parser().contentType() - ) - ) { - DocumentParserContext subContext = nestedContext.switchParser(subParser); - subParser.nextToken(); - embeddingsField.parse(subContext); - } - - if (InferenceMetadataFieldsMapper.isEnabled(indexSettings.getIndexVersionCreated())) { - try (XContentBuilder builder = XContentFactory.contentBuilder(context.parser().contentType())) { - builder.startObject(); - builder.field("field", entry.getKey()); - builder.field("start", chunk.startOffset()); - builder.field("end", chunk.endOffset()); - builder.endObject(); - try ( - XContentParser subParser = XContentHelper.createParserNotCompressed( - XContentParserConfiguration.EMPTY, - BytesReference.bytes(builder), - context.parser().contentType() - ) - ) { - DocumentParserContext subContext = nestedContext.switchParser(subParser); - subParser.nextToken(); - offsetsField.parse(subContext); - } - } - } + for (var chunk : field.inference().chunks()) { + try ( + XContentParser subParser = XContentHelper.createParserNotCompressed( + XContentParserConfiguration.EMPTY, + chunk.rawEmbeddings(), + context.parser().contentType() + ) + ) { + DocumentParserContext subContext = context.createNestedContext(chunksField).switchParser(subParser); + subParser.nextToken(); + embeddingsField.parse(subContext); } } } @@ -441,6 +390,20 @@ public InferenceFieldMetadata getMetadata(Set sourcePaths) { return new InferenceFieldMetadata(fullPath(), fieldType().getInferenceId(), fieldType().getSearchInferenceId(), copyFields); } + @Override + public Object getOriginalValue(Map sourceAsMap) { + Object fieldValue = sourceAsMap.get(fullPath()); + if (fieldValue == null) { + return null; + } else if (fieldValue instanceof Map == false) { + // Don't try to further validate the non-map value, that will be handled when the source is fully parsed + return fieldValue; + } + + Map fieldValueMap = XContentMapValues.nodeMapValue(fieldValue, "Field [" + fullPath() + "]"); + return XContentMapValues.extractValue(TEXT_FIELD, fieldValueMap); + } + @Override protected void doValidate(MappingLookup mappers) { int parentPathIndex = fullPath().lastIndexOf(leafName()); @@ -513,10 +476,6 @@ public FieldMapper getEmbeddingsField() { return (FieldMapper) getChunksField().getMapper(CHUNKED_EMBEDDINGS_FIELD); } - public FieldMapper getOffsetsField() { - return (FieldMapper) getChunksField().getMapper(CHUNKED_OFFSET_FIELD); - } - @Override public Query termQuery(Object value, SearchExecutionContext context) { throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support term query"); @@ -539,31 +498,8 @@ public Query existsQuery(SearchExecutionContext context) { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - if (InferenceMetadataFieldsMapper.isEnabled(indexVersionCreated)) { - return SourceValueFetcher.toString(name(), context, null); - } else { - // Redirect the fetcher to load the original values of the field - return SourceValueFetcher.toString(getOriginalTextFieldName(name()), context, format); - } - } - - ValueFetcher valueFetcherWithInferenceResults(Function bitSetCache, IndexSearcher searcher) { - var embeddingsField = getEmbeddingsField(); - if (embeddingsField == null) { - return ValueFetcher.EMPTY; - } - try { - var embeddingsLoader = embeddingsField.syntheticFieldLoader(); - var bitSetFilter = bitSetCache.apply(getChunksField().parentTypeFilter()); - var childWeight = searcher.createWeight( - getChunksField().nestedTypeFilter(), - org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES, - 1 - ); - return new SemanticTextFieldValueFetcher(bitSetFilter, childWeight, embeddingsLoader); - } catch (IOException exc) { - throw new UncheckedIOException(exc); - } + // Redirect the fetcher to load the original values of the field + return SourceValueFetcher.toString(getOriginalTextFieldName(name()), context, format); } @Override @@ -686,129 +622,118 @@ private String generateInvalidQueryInferenceResultsMessage(StringBuilder baseMes @Override public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) { - String name = InferenceMetadataFieldsMapper.isEnabled(indexVersionCreated) ? name() : name().concat(".text"); - SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name)); + SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name().concat(".text"))); return new BlockSourceReader.BytesRefsBlockLoader(fetcher, BlockSourceReader.lookupMatchingAll()); } + } - public IndexVersion getIndexVersionCreated() { - return indexVersionCreated; + /** + *

+ * Insert or replace the path's value in the map with the provided new value. The map will be modified in-place. + * If the complete path does not exist in the map, it will be added to the deepest (sub-)map possible. + *

+ *

+ * For example, given the map: + *

+ *
+     * {
+     *   "path1": {
+     *     "path2": {
+     *       "key1": "value1"
+     *     }
+     *   }
+     * }
+     * 
+ *

+ * And the caller wanted to insert {@code "path1.path2.path3.key2": "value2"}, the method would emit the modified map: + *

+ *
+     * {
+     *   "path1": {
+     *     "path2": {
+     *       "key1": "value1",
+     *       "path3.key2": "value2"
+     *     }
+     *   }
+     * }
+     * 
+ * + * @param path the value's path in the map. + * @param map the map to search and modify in-place. + * @param newValue the new value to assign to the path. + * + * @throws IllegalArgumentException If either the path cannot be fully traversed or there is ambiguity about where to insert the new + * value. + */ + public static void insertValue(String path, Map map, Object newValue) { + String[] pathElements = path.split("\\."); + if (pathElements.length == 0) { + return; } - private class SemanticTextFieldValueFetcher implements ValueFetcher { - private final BitSetProducer parentBitSetProducer; - private final Weight childWeight; - private final SourceLoader.SyntheticFieldLoader fieldLoader; + List suffixMaps = extractSuffixMaps(pathElements, 0, map); + if (suffixMaps.isEmpty()) { + // This should never happen. Throw in case it does for some reason. + throw new IllegalStateException("extractSuffixMaps returned an empty suffix map list"); + } else if (suffixMaps.size() == 1) { + SuffixMap suffixMap = suffixMaps.getFirst(); + suffixMap.map().put(suffixMap.suffix(), newValue); + } else { + throw new IllegalArgumentException( + "Path [" + path + "] could be inserted in " + suffixMaps.size() + " distinct ways, it is ambiguous which one to use" + ); + } + } - private BitSet bitSet; - private Scorer childScorer; - private SourceLoader.SyntheticFieldLoader.DocValuesLoader dvLoader; - private OffsetSourceField.OffsetSourceLoader offsetsLoader; + private record SuffixMap(String suffix, Map map) {} - private SemanticTextFieldValueFetcher( - BitSetProducer bitSetProducer, - Weight childWeight, - SourceLoader.SyntheticFieldLoader fieldLoader - ) { - this.parentBitSetProducer = bitSetProducer; - this.childWeight = childWeight; - this.fieldLoader = fieldLoader; + private static List extractSuffixMaps(String[] pathElements, int index, Object currentValue) { + if (currentValue instanceof List valueList) { + List suffixMaps = new ArrayList<>(valueList.size()); + for (Object o : valueList) { + suffixMaps.addAll(extractSuffixMaps(pathElements, index, o)); } - @Override - public void setNextReader(LeafReaderContext context) { - try { - bitSet = parentBitSetProducer.getBitSet(context); - childScorer = childWeight.scorer(context); - if (childScorer != null) { - childScorer.iterator().nextDoc(); + return suffixMaps; + } else if (currentValue instanceof Map) { + @SuppressWarnings("unchecked") + Map map = (Map) currentValue; + List suffixMaps = new ArrayList<>(map.size()); + + String key = pathElements[index]; + while (index < pathElements.length) { + if (map.containsKey(key)) { + if (index + 1 == pathElements.length) { + // We found the complete path + suffixMaps.add(new SuffixMap(key, map)); + } else { + // We've matched that path partially, keep traversing to try to match it fully + suffixMaps.addAll(extractSuffixMaps(pathElements, index + 1, map.get(key))); } - dvLoader = fieldLoader.docValuesLoader(context.reader(), null); - var terms = context.reader().terms(getOffsetsFieldName(name())); - offsetsLoader = terms != null ? OffsetSourceField.loader(terms) : null; - } catch (IOException exc) { - throw new UncheckedIOException(exc); } - } - @Override - public List fetchValues(Source source, int doc, List ignoredValues) throws IOException { - if (childScorer == null || offsetsLoader == null || doc == 0) { - return List.of(); - } - int previousParent = bitSet.prevSetBit(doc - 1); - var it = childScorer.iterator(); - if (it.docID() < previousParent) { - it.advance(previousParent); + if (++index < pathElements.length) { + key += "." + pathElements[index]; } - Map> chunkMap = new LinkedHashMap<>(); - while (it.docID() < doc) { - if (dvLoader == null || dvLoader.advanceToDoc(it.docID()) == false) { - throw new IllegalStateException( - "Cannot fetch values for field [" + name() + "], missing embeddings for doc [" + doc + "]" - ); - } - var offset = offsetsLoader.advanceTo(it.docID()); - if (offset == null) { - throw new IllegalStateException( - "Cannot fetch values for field [" + name() + "], missing offsets for doc [" + doc + "]" - ); - } - var chunks = chunkMap.computeIfAbsent(offset.field(), k -> new ArrayList<>()); - chunks.add( - new SemanticTextField.Chunk( - null, - offset.start(), - offset.end(), - rawEmbeddings(fieldLoader::write, source.sourceContentType()) - ) - ); - if (it.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - } - if (chunkMap.isEmpty()) { - return List.of(); - } - return List.of( - new SemanticTextField( - indexVersionCreated, - name(), - null, - new SemanticTextField.InferenceResult(inferenceId, modelSettings, chunkMap), - source.sourceContentType() - ) - ); } - private BytesReference rawEmbeddings(CheckedConsumer writer, XContentType xContentType) - throws IOException { - try (var result = XContentFactory.contentBuilder(xContentType)) { - try (var builder = XContentFactory.contentBuilder(xContentType)) { - builder.startObject(); - writer.accept(builder); - builder.endObject(); - try ( - XContentParser parser = XContentHelper.createParserNotCompressed( - XContentParserConfiguration.EMPTY, - BytesReference.bytes(builder), - xContentType - ) - ) { - XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser); - XContentParserUtils.ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.nextToken(), parser); - parser.nextToken(); - result.copyCurrentStructure(parser); - } - return BytesReference.bytes(result); - } - } + if (suffixMaps.isEmpty()) { + // We checked for all remaining elements in the path, and they do not exist. This means we found a leaf map that we should + // add the value to. + suffixMaps.add(new SuffixMap(key, map)); } - @Override - public StoredFieldsSpec storedFieldsSpec() { - return StoredFieldsSpec.NO_REQUIREMENTS; - } + return suffixMaps; + } else { + throw new IllegalArgumentException( + "Path [" + + String.join(".", Arrays.copyOfRange(pathElements, 0, index)) + + "] has value [" + + currentValue + + "] of type [" + + currentValue.getClass().getSimpleName() + + "], which cannot be traversed into further" + ); } } @@ -831,27 +756,24 @@ private static NestedObjectMapper.Builder createChunksField( IndexSettings indexSettings ) { NestedObjectMapper.Builder chunksField = new NestedObjectMapper.Builder( - SemanticTextField.CHUNKS_FIELD, - indexSettings.getIndexVersionCreated(), + CHUNKS_FIELD, + indexVersionCreated, bitSetProducer, indexSettings ); chunksField.dynamic(ObjectMapper.Dynamic.FALSE); + KeywordFieldMapper.Builder chunkTextField = new KeywordFieldMapper.Builder(CHUNKED_TEXT_FIELD, indexVersionCreated).indexed(false) + .docValues(false); if (modelSettings != null) { - chunksField.add(createEmbeddingsField(indexSettings.getIndexVersionCreated(), modelSettings)); - } - if (InferenceMetadataFieldsMapper.isEnabled(indexVersionCreated)) { - chunksField.add(new OffsetSourceFieldMapper.Builder(CHUNKED_OFFSET_FIELD)); - } else { - var chunkTextField = new KeywordFieldMapper.Builder(TEXT_FIELD, indexVersionCreated).indexed(false).docValues(false); - chunksField.add(chunkTextField); + chunksField.add(createEmbeddingsField(indexVersionCreated, modelSettings)); } + chunksField.add(chunkTextField); return chunksField; } private static Mapper.Builder createEmbeddingsField(IndexVersion indexVersionCreated, SemanticTextField.ModelSettings modelSettings) { return switch (modelSettings.taskType()) { - case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(CHUNKED_EMBEDDINGS_FIELD).setStored(true); + case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(CHUNKED_EMBEDDINGS_FIELD); case TEXT_EMBEDDING -> { DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder( CHUNKED_EMBEDDINGS_FIELD, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextUtils.java deleted file mode 100644 index 4d3c2e8752367..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextUtils.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.mapper; - -import org.elasticsearch.ElasticsearchStatusException; -import org.elasticsearch.rest.RestStatus; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Map; - -public interface SemanticTextUtils { - /** - * This method converts the given {@code valueObj} into a list of strings. - */ - static List nodeStringValues(String field, Object valueObj) { - if (valueObj instanceof Number || valueObj instanceof Boolean) { - return List.of(valueObj.toString()); - } else if (valueObj instanceof String value) { - return List.of(value); - } else if (valueObj instanceof Collection values) { - List valuesString = new ArrayList<>(); - for (var v : values) { - if (v instanceof Number || v instanceof Boolean) { - valuesString.add(v.toString()); - } else if (v instanceof String value) { - valuesString.add(value); - } else { - throw new ElasticsearchStatusException( - "Invalid format for field [{}], expected [String|Number|Boolean] got [{}]", - RestStatus.BAD_REQUEST, - field, - valueObj.getClass().getSimpleName() - ); - } - } - return valuesString; - } - throw new ElasticsearchStatusException( - "Invalid format for field [{}], expected [String|Number|Boolean] got [{}]", - RestStatus.BAD_REQUEST, - field, - valueObj.getClass().getSimpleName() - ); - } - - /** - *

- * Insert or replace the path's value in the map with the provided new value. The map will be modified in-place. - * If the complete path does not exist in the map, it will be added to the deepest (sub-)map possible. - *

- *

- * For example, given the map: - *

- *
-     * {
-     *   "path1": {
-     *     "path2": {
-     *       "key1": "value1"
-     *     }
-     *   }
-     * }
-     * 
- *

- * And the caller wanted to insert {@code "path1.path2.path3.key2": "value2"}, the method would emit the modified map: - *

- *
-     * {
-     *   "path1": {
-     *     "path2": {
-     *       "key1": "value1",
-     *       "path3.key2": "value2"
-     *     }
-     *   }
-     * }
-     * 
- * - * @param path the value's path in the map. - * @param map the map to search and modify in-place. - * @param newValue the new value to assign to the path. - * - * @throws IllegalArgumentException If either the path cannot be fully traversed or there is ambiguity about where to insert the new - * value. - */ - static void insertValue(String path, Map map, Object newValue) { - String[] pathElements = path.split("\\."); - if (pathElements.length == 0) { - return; - } - - List suffixMaps = extractSuffixMaps(pathElements, 0, map); - if (suffixMaps.isEmpty()) { - // This should never happen. Throw in case it does for some reason. - throw new IllegalStateException("extractSuffixMaps returned an empty suffix map list"); - } else if (suffixMaps.size() == 1) { - SuffixMap suffixMap = suffixMaps.getFirst(); - suffixMap.map().put(suffixMap.suffix(), newValue); - } else { - throw new IllegalArgumentException( - "Path [" + path + "] could be inserted in " + suffixMaps.size() + " distinct ways, it is ambiguous which one to use" - ); - } - } - - record SuffixMap(String suffix, Map map) {} - - private static List extractSuffixMaps(String[] pathElements, int index, Object currentValue) { - if (currentValue instanceof List valueList) { - List suffixMaps = new ArrayList<>(valueList.size()); - for (Object o : valueList) { - suffixMaps.addAll(extractSuffixMaps(pathElements, index, o)); - } - - return suffixMaps; - } else if (currentValue instanceof Map) { - @SuppressWarnings("unchecked") - Map map = (Map) currentValue; - List suffixMaps = new ArrayList<>(map.size()); - - String key = pathElements[index]; - while (index < pathElements.length) { - if (map.containsKey(key)) { - if (index + 1 == pathElements.length) { - // We found the complete path - suffixMaps.add(new SuffixMap(key, map)); - } else { - // We've matched that path partially, keep traversing to try to match it fully - suffixMaps.addAll(extractSuffixMaps(pathElements, index + 1, map.get(key))); - } - } - - if (++index < pathElements.length) { - key += "." + pathElements[index]; - } - } - - if (suffixMaps.isEmpty()) { - // We checked for all remaining elements in the path, and they do not exist. This means we found a leaf map that we should - // add the value to. - suffixMaps.add(new SuffixMap(key, map)); - } - - return suffixMaps; - } else { - throw new IllegalArgumentException( - "Path [" - + String.join(".", Arrays.copyOfRange(pathElements, 0, index)) - + "] has value [" - + currentValue - + "] of type [" - + currentValue.getClass().getSimpleName() - + "], which cannot be traversed into further" - ); - } - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestGetInferenceModelAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestGetInferenceModelAction.java index 129154e403089..83b2a8a0f5182 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestGetInferenceModelAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestGetInferenceModelAction.java @@ -16,12 +16,9 @@ import org.elasticsearch.rest.action.RestToXContentListener; import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; -import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.Set; -import static org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_FEATURE_FLAG; import static org.elasticsearch.rest.RestRequest.Method.GET; import static org.elasticsearch.xpack.inference.rest.Paths.INFERENCE_ID; import static org.elasticsearch.xpack.inference.rest.Paths.INFERENCE_ID_PATH; @@ -31,7 +28,6 @@ @ServerlessScope(Scope.PUBLIC) public class RestGetInferenceModelAction extends BaseRestHandler { public static final String DEFAULT_ELSER_2_CAPABILITY = "default_elser_2"; - public static final String INFERENCE_METADATA_FIELDS_CAPABILITY = "inference_metadata_fields"; @Override public String getName() { @@ -70,12 +66,6 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient @Override public Set supportedCapabilities() { - Set capabilities = new HashSet<>(); - capabilities.add(DEFAULT_ELSER_2_CAPABILITY); - if (INFERENCE_METADATA_FIELDS_FEATURE_FLAG.isEnabled()) { - capabilities.add(INFERENCE_METADATA_FIELDS_CAPABILITY); - } - - return Collections.unmodifiableSet(capabilities); + return Set.of(DEFAULT_ELSER_2_CAPABILITY); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java index 73fe792664071..0b7d136ffb04c 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java @@ -16,18 +16,10 @@ import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.support.ActionFilterChain; import org.elasticsearch.action.support.WriteRequest; -import org.elasticsearch.cluster.ClusterName; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; -import org.elasticsearch.cluster.metadata.Metadata; -import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Strings; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.support.XContentMapValues; -import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.inference.ChunkedInference; import org.elasticsearch.inference.InferenceService; @@ -38,7 +30,6 @@ import org.elasticsearch.rest.RestStatus; import org.elasticsearch.tasks.Task; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.XContentType; @@ -92,21 +83,9 @@ public void tearDownThreadPool() throws Exception { terminate(threadPool); } - private IndexVersion getRandomIndexVersion() { - return randomFrom( - IndexVersionUtils.randomVersionBetween( - random(), - IndexVersions.SEMANTIC_TEXT_FIELD_TYPE, - IndexVersionUtils.getPreviousVersion(IndexVersions.INFERENCE_METADATA_FIELDS) - ), - IndexVersionUtils.randomVersionBetween(random(), IndexVersions.INFERENCE_METADATA_FIELDS, IndexVersion.current()) - ); - } - @SuppressWarnings({ "unchecked", "rawtypes" }) public void testFilterNoop() throws Exception { - IndexVersion indexVersion = getRandomIndexVersion(); - ShardBulkInferenceActionFilter filter = createFilter(threadPool, Map.of(), DEFAULT_BATCH_SIZE, indexVersion); + ShardBulkInferenceActionFilter filter = createFilter(threadPool, Map.of(), DEFAULT_BATCH_SIZE); CountDownLatch chainExecuted = new CountDownLatch(1); ActionFilterChain actionFilterChain = (task, action, request, listener) -> { try { @@ -132,12 +111,10 @@ public void testFilterNoop() throws Exception { @SuppressWarnings({ "unchecked", "rawtypes" }) public void testInferenceNotFound() throws Exception { StaticModel model = StaticModel.createRandomInstance(); - IndexVersion indexVersion = getRandomIndexVersion(); ShardBulkInferenceActionFilter filter = createFilter( threadPool, Map.of(model.getInferenceEntityId(), model), - randomIntBetween(1, 10), - indexVersion + randomIntBetween(1, 10) ); CountDownLatch chainExecuted = new CountDownLatch(1); ActionFilterChain actionFilterChain = (task, action, request, listener) -> { @@ -167,7 +144,7 @@ public void testInferenceNotFound() throws Exception { ); BulkItemRequest[] items = new BulkItemRequest[10]; for (int i = 0; i < items.length; i++) { - items[i] = randomBulkItemRequest(indexVersion, Map.of(), inferenceFieldMap)[0]; + items[i] = randomBulkItemRequest(Map.of(), inferenceFieldMap)[0]; } BulkShardRequest request = new BulkShardRequest(new ShardId("test", "test", 0), WriteRequest.RefreshPolicy.NONE, items); request.setInferenceFieldMap(inferenceFieldMap); @@ -178,14 +155,10 @@ public void testInferenceNotFound() throws Exception { @SuppressWarnings({ "unchecked", "rawtypes" }) public void testItemFailures() throws Exception { StaticModel model = StaticModel.createRandomInstance(); - IndexVersion indexVersion = getRandomIndexVersion(); - boolean useInferenceMetadataFieldsFormat = InferenceMetadataFieldsMapper.isEnabled(indexVersion); - ShardBulkInferenceActionFilter filter = createFilter( threadPool, Map.of(model.getInferenceEntityId(), model), - randomIntBetween(1, 10), - indexVersion + randomIntBetween(1, 10) ); model.putResult("I am a failure", new ChunkedInferenceError(new IllegalArgumentException("boom"))); model.putResult("I am a success", randomChunkedInferenceEmbeddingSparse(List.of("I am a success"))); @@ -205,13 +178,7 @@ public void testItemFailures() throws Exception { // item 1 is a success assertNull(bulkShardRequest.items()[1].getPrimaryResponse()); IndexRequest actualRequest = getIndexRequestOrNull(bulkShardRequest.items()[1].request()); - assertThat( - XContentMapValues.extractValue( - useInferenceMetadataFieldsFormat ? "field1" : "field1.text", - actualRequest.sourceAsMap() - ), - equalTo("I am a success") - ); + assertThat(XContentMapValues.extractValue("field1.text", actualRequest.sourceAsMap()), equalTo("I am a success")); // item 2 is a failure assertNotNull(bulkShardRequest.items()[2].getPrimaryResponse()); @@ -241,7 +208,6 @@ public void testItemFailures() throws Exception { @SuppressWarnings({ "unchecked", "rawtypes" }) public void testManyRandomDocs() throws Exception { - IndexVersion indexVersion = getRandomIndexVersion(); Map inferenceModelMap = new HashMap<>(); int numModels = randomIntBetween(1, 3); for (int i = 0; i < numModels; i++) { @@ -261,12 +227,12 @@ public void testManyRandomDocs() throws Exception { BulkItemRequest[] originalRequests = new BulkItemRequest[numRequests]; BulkItemRequest[] modifiedRequests = new BulkItemRequest[numRequests]; for (int id = 0; id < numRequests; id++) { - BulkItemRequest[] res = randomBulkItemRequest(indexVersion, inferenceModelMap, inferenceFieldMap); + BulkItemRequest[] res = randomBulkItemRequest(inferenceModelMap, inferenceFieldMap); originalRequests[id] = res[0]; modifiedRequests[id] = res[1]; } - ShardBulkInferenceActionFilter filter = createFilter(threadPool, inferenceModelMap, randomIntBetween(10, 30), indexVersion); + ShardBulkInferenceActionFilter filter = createFilter(threadPool, inferenceModelMap, randomIntBetween(10, 30)); CountDownLatch chainExecuted = new CountDownLatch(1); ActionFilterChain actionFilterChain = (task, action, request, listener) -> { try { @@ -297,12 +263,7 @@ public void testManyRandomDocs() throws Exception { } @SuppressWarnings("unchecked") - private static ShardBulkInferenceActionFilter createFilter( - ThreadPool threadPool, - Map modelMap, - int batchSize, - IndexVersion indexVersion - ) { + private static ShardBulkInferenceActionFilter createFilter(ThreadPool threadPool, Map modelMap, int batchSize) { ModelRegistry modelRegistry = mock(ModelRegistry.class); Answer unparsedModelAnswer = invocationOnMock -> { String id = (String) invocationOnMock.getArguments()[0]; @@ -358,42 +319,24 @@ private static ShardBulkInferenceActionFilter createFilter( InferenceServiceRegistry inferenceServiceRegistry = mock(InferenceServiceRegistry.class); when(inferenceServiceRegistry.getService(any())).thenReturn(Optional.of(inferenceService)); - - return new ShardBulkInferenceActionFilter(createClusterService(indexVersion), inferenceServiceRegistry, modelRegistry, batchSize); - } - - private static ClusterService createClusterService(IndexVersion indexVersion) { - IndexMetadata indexMetadata = mock(IndexMetadata.class); - when(indexMetadata.getCreationVersion()).thenReturn(indexVersion); - - Metadata metadata = mock(Metadata.class); - when(metadata.index(any(String.class))).thenReturn(indexMetadata); - - ClusterState clusterState = ClusterState.builder(new ClusterName("test")).metadata(metadata).build(); - ClusterService clusterService = mock(ClusterService.class); - when(clusterService.state()).thenReturn(clusterState); - - return clusterService; + ShardBulkInferenceActionFilter filter = new ShardBulkInferenceActionFilter(inferenceServiceRegistry, modelRegistry, batchSize); + return filter; } private static BulkItemRequest[] randomBulkItemRequest( - IndexVersion indexVersion, Map modelMap, Map fieldInferenceMap ) throws IOException { Map docMap = new LinkedHashMap<>(); Map expectedDocMap = new LinkedHashMap<>(); XContentType requestContentType = randomFrom(XContentType.values()); - boolean useInferenceMetadataFieldsFormat = InferenceMetadataFieldsMapper.isEnabled(indexVersion); - - Map inferenceMetadataFields = new HashMap<>(); for (var entry : fieldInferenceMap.values()) { String field = entry.getName(); var model = modelMap.get(entry.getInferenceId()); Object inputObject = randomSemanticTextInput(); String inputText = inputObject.toString(); docMap.put(field, inputObject); - expectedDocMap.put(field, useInferenceMetadataFieldsFormat ? inputObject : inputText); + expectedDocMap.put(field, inputText); if (model == null) { // ignore results, the doc should fail with a resource not found exception continue; @@ -406,7 +349,6 @@ private static BulkItemRequest[] randomBulkItemRequest( if (model.hasResult(inputText)) { var results = model.getResults(inputText); semanticTextField = semanticTextFieldFromChunkedInferenceResults( - indexVersion, field, model, List.of(inputText), @@ -414,19 +356,11 @@ private static BulkItemRequest[] randomBulkItemRequest( requestContentType ); } else { - Map> inputTextMap = Map.of(field, List.of(inputText)); - semanticTextField = randomSemanticText(indexVersion, field, model, List.of(inputText), requestContentType); - model.putResult(inputText, toChunkedResult(inputTextMap, semanticTextField)); + semanticTextField = randomSemanticText(field, model, List.of(inputText), requestContentType); + model.putResult(inputText, toChunkedResult(semanticTextField)); } - if (useInferenceMetadataFieldsFormat) { - inferenceMetadataFields.put(field, semanticTextField); - } else { - expectedDocMap.put(field, semanticTextField); - } - } - if (useInferenceMetadataFieldsFormat) { - expectedDocMap.put(InferenceMetadataFieldsMapper.NAME, inferenceMetadataFields); + expectedDocMap.put(field, semanticTextField); } int requestId = randomIntBetween(0, Integer.MAX_VALUE); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java index f76ceeb20b6c8..78743409ca178 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java @@ -26,7 +26,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperServiceTestCase; import org.elasticsearch.index.mapper.SourceToParse; @@ -46,7 +45,6 @@ import org.elasticsearch.search.lookup.Source; import org.elasticsearch.search.rank.RankDoc; import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; -import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder; import org.elasticsearch.xpack.core.ml.search.WeightedToken; @@ -79,12 +77,6 @@ protected Collection getPlugins() { return List.of(new InferencePlugin(Settings.EMPTY)); } - @Override - protected IndexVersion getVersion() { - // TODO: Update once highlighter supports inference metadata fields - return IndexVersionUtils.getPreviousVersion(IndexVersions.INFERENCE_METADATA_FIELDS); - } - @Override @Before public void setUp() throws Exception { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticInferenceMetadataFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticInferenceMetadataFieldMapperTests.java deleted file mode 100644 index 6504ccc4dd39f..0000000000000 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticInferenceMetadataFieldMapperTests.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.mapper; - -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.MapperServiceTestCase; -import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.xpack.inference.InferencePlugin; - -import java.util.Collection; -import java.util.Collections; - -public class SemanticInferenceMetadataFieldMapperTests extends MapperServiceTestCase { - @Override - protected Collection getPlugins() { - return Collections.singletonList(new InferencePlugin(Settings.EMPTY)); - } - - @Override - public void testFieldHasValue() { - assertTrue( - getMappedFieldType().fieldHasValue( - new FieldInfos(new FieldInfo[] { getFieldInfoWithName(SemanticInferenceMetadataFieldsMapper.NAME) }) - ) - ); - } - - @Override - public void testFieldHasValueWithEmptyFieldInfos() { - assertFalse(getMappedFieldType().fieldHasValue(FieldInfos.EMPTY)); - } - - @Override - public MappedFieldType getMappedFieldType() { - return new SemanticInferenceMetadataFieldsMapper.FieldType(); - } -} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 0092068b50c03..c6a492dfcf4e9 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -29,13 +29,10 @@ import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; import org.elasticsearch.index.mapper.MappedFieldType; @@ -59,32 +56,36 @@ import org.elasticsearch.search.LeafNestedDocuments; import org.elasticsearch.search.NestedDocuments; import org.elasticsearch.search.SearchHit; -import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; -import org.elasticsearch.xpack.core.XPackClientPlugin; import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryWrapper; import org.elasticsearch.xpack.inference.InferencePlugin; import org.elasticsearch.xpack.inference.model.TestModel; import org.junit.AssumptionViolatedException; import java.io.IOException; +import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.function.BiConsumer; +import java.util.stream.Stream; +import static java.util.Collections.singletonList; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_TEXT_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKS_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.INFERENCE_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.INFERENCE_ID_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.MODEL_SETTINGS_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.SEARCH_INFERENCE_ID_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.TEXT_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getChunksFieldName; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getEmbeddingsFieldName; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID; @@ -96,7 +97,7 @@ public class SemanticTextFieldMapperTests extends MapperTestCase { @Override protected Collection getPlugins() { - return List.of(new InferencePlugin(Settings.EMPTY), new XPackClientPlugin()); + return singletonList(new InferencePlugin(Settings.EMPTY)); } @Override @@ -151,7 +152,15 @@ protected IngestScriptSupport ingestScriptSupport() { @Override public MappedFieldType getMappedFieldType() { - return new SemanticTextFieldMapper.SemanticTextFieldType("field", "fake-inference-id", null, null, null, getVersion(), Map.of()); + return new SemanticTextFieldMapper.SemanticTextFieldType( + "field", + "fake-inference-id", + null, + null, + null, + IndexVersion.current(), + Map.of() + ); } @Override @@ -161,18 +170,6 @@ protected void assertSearchable(MappedFieldType fieldType) { assertTrue(fieldType.isSearchable()); } - @Override - protected IndexVersion getVersion() { - return randomFrom( - IndexVersionUtils.randomVersionBetween( - random(), - IndexVersions.SEMANTIC_TEXT_FIELD_TYPE, - IndexVersionUtils.getPreviousVersion(IndexVersions.INFERENCE_METADATA_FIELDS) - ), - IndexVersionUtils.randomVersionBetween(random(), IndexVersions.INFERENCE_METADATA_FIELDS, IndexVersion.current()) - ); - } - public void testDefaults() throws Exception { final String fieldName = "field"; final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping); @@ -457,10 +454,6 @@ public void testUpdateSearchInferenceId() throws IOException { } private static void assertSemanticTextField(MapperService mapperService, String fieldName, boolean expectedModelSettings) { - final boolean useInferenceMetadataFieldsFormat = InferenceMetadataFieldsMapper.isEnabled( - mapperService.getIndexSettings().getIndexVersionCreated() - ); - Mapper mapper = mapperService.mappingLookup().getMapper(fieldName); assertNotNull(mapper); assertThat(mapper, instanceOf(SemanticTextFieldMapper.class)); @@ -478,19 +471,12 @@ private static void assertSemanticTextField(MapperService mapperService, String .get(getChunksFieldName(fieldName)); assertThat(chunksMapper, equalTo(semanticFieldMapper.fieldType().getChunksField())); assertThat(chunksMapper.fullPath(), equalTo(getChunksFieldName(fieldName))); - - Mapper textMapper = chunksMapper.getMapper(TEXT_FIELD); - if (useInferenceMetadataFieldsFormat) { - // TODO: Check for offsets mapper here - assertNull(textMapper); - } else { - assertNotNull(textMapper); - assertThat(textMapper, instanceOf(KeywordFieldMapper.class)); - KeywordFieldMapper textFieldMapper = (KeywordFieldMapper) textMapper; - assertFalse(textFieldMapper.fieldType().isIndexed()); - assertFalse(textFieldMapper.fieldType().hasDocValues()); - } - + Mapper textMapper = chunksMapper.getMapper(CHUNKED_TEXT_FIELD); + assertNotNull(textMapper); + assertThat(textMapper, instanceOf(KeywordFieldMapper.class)); + KeywordFieldMapper textFieldMapper = (KeywordFieldMapper) textMapper; + assertFalse(textFieldMapper.fieldType().isIndexed()); + assertFalse(textFieldMapper.fieldType().hasDocValues()); if (expectedModelSettings) { assertNotNull(semanticFieldMapper.fieldType().getModelSettings()); Mapper embeddingsMapper = chunksMapper.getMapper(CHUNKED_EMBEDDINGS_FIELD); @@ -553,16 +539,14 @@ public void testSuccessfulParse() throws IOException { setSearchInferenceId ? searchInferenceId : model2.getInferenceEntityId() ); - final IndexVersion indexVersion = mapperService.getIndexSettings().getIndexVersionCreated(); DocumentMapper documentMapper = mapperService.documentMapper(); ParsedDocument doc = documentMapper.parse( source( b -> addSemanticTextInferenceResults( - indexVersion, b, List.of( - randomSemanticText(indexVersion, fieldName1, model1, List.of("a b", "c"), XContentType.JSON), - randomSemanticText(indexVersion, fieldName2, model2, List.of("d e f"), XContentType.JSON) + randomSemanticText(fieldName1, model1, List.of("a b", "c"), XContentType.JSON), + randomSemanticText(fieldName2, model2, List.of("d e f"), XContentType.JSON) ) ) ) @@ -581,7 +565,11 @@ public void testSuccessfulParse() throws IOException { assertNull(luceneDocs.get(3).getParent()); withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), reader -> { - NestedDocuments nested = new NestedDocuments(mapperService.mappingLookup(), QueryBitSetProducer::new, indexVersion); + NestedDocuments nested = new NestedDocuments( + mapperService.mappingLookup(), + QueryBitSetProducer::new, + IndexVersion.current() + ); LeafNestedDocuments leaf = nested.getLeafNestedDocuments(reader.leaves().get(0)); Set visitedNestedIdentities = new HashSet<>(); @@ -604,12 +592,7 @@ public void testSuccessfulParse() throws IOException { IndexSearcher searcher = newSearcher(reader); { TopDocs topDocs = searcher.search( - generateNestedTermSparseVectorQuery( - indexVersion, - mapperService.mappingLookup().nestedLookup(), - fieldName1, - List.of("a") - ), + generateNestedTermSparseVectorQuery(mapperService.mappingLookup().nestedLookup(), fieldName1, List.of("a")), 10 ); assertEquals(1, topDocs.totalHits.value()); @@ -617,12 +600,7 @@ public void testSuccessfulParse() throws IOException { } { TopDocs topDocs = searcher.search( - generateNestedTermSparseVectorQuery( - indexVersion, - mapperService.mappingLookup().nestedLookup(), - fieldName1, - List.of("a", "b") - ), + generateNestedTermSparseVectorQuery(mapperService.mappingLookup().nestedLookup(), fieldName1, List.of("a", "b")), 10 ); assertEquals(1, topDocs.totalHits.value()); @@ -630,12 +608,7 @@ public void testSuccessfulParse() throws IOException { } { TopDocs topDocs = searcher.search( - generateNestedTermSparseVectorQuery( - indexVersion, - mapperService.mappingLookup().nestedLookup(), - fieldName2, - List.of("d") - ), + generateNestedTermSparseVectorQuery(mapperService.mappingLookup().nestedLookup(), fieldName2, List.of("d")), 10 ); assertEquals(1, topDocs.totalHits.value()); @@ -643,12 +616,7 @@ public void testSuccessfulParse() throws IOException { } { TopDocs topDocs = searcher.search( - generateNestedTermSparseVectorQuery( - indexVersion, - mapperService.mappingLookup().nestedLookup(), - fieldName2, - List.of("z") - ), + generateNestedTermSparseVectorQuery(mapperService.mappingLookup().nestedLookup(), fieldName2, List.of("z")), 10 ); assertEquals(0, topDocs.totalHits.value()); @@ -658,63 +626,52 @@ public void testSuccessfulParse() throws IOException { } public void testMissingInferenceId() throws IOException { - final MapperService mapperService = createMapperService(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); - final IndexVersion indexVersion = mapperService.getIndexSettings().getIndexVersionCreated(); - final boolean useInferenceMetadataFieldsFormat = InferenceMetadataFieldsMapper.isEnabled(indexVersion); - + DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); IllegalArgumentException ex = expectThrows( DocumentParsingException.class, IllegalArgumentException.class, - () -> mapperService.documentMapper() - .parse( - semanticTextInferenceSource( - indexVersion, - b -> b.startObject("field") - .startObject(INFERENCE_FIELD) - .field(MODEL_SETTINGS_FIELD, new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null)) - .field(CHUNKS_FIELD, useInferenceMetadataFieldsFormat ? Map.of() : List.of()) - .endObject() - .endObject() - ) + () -> documentMapper.parse( + source( + b -> b.startObject("field") + .startObject(INFERENCE_FIELD) + .field(MODEL_SETTINGS_FIELD, new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null)) + .field(CHUNKS_FIELD, List.of()) + .endObject() + .endObject() ) + ) ); assertThat(ex.getCause().getMessage(), containsString("Required [inference_id]")); } public void testMissingModelSettings() throws IOException { - MapperService mapperService = createMapperService(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); + DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); IllegalArgumentException ex = expectThrows( DocumentParsingException.class, IllegalArgumentException.class, - () -> mapperService.documentMapper() - .parse( - semanticTextInferenceSource( - mapperService.getIndexSettings().getIndexVersionCreated(), - b -> b.startObject("field").startObject(INFERENCE_FIELD).field(INFERENCE_ID_FIELD, "my_id").endObject().endObject() - ) - ) + () -> documentMapper.parse( + source(b -> b.startObject("field").startObject(INFERENCE_FIELD).field(INFERENCE_ID_FIELD, "my_id").endObject().endObject()) + ) ); assertThat(ex.getCause().getMessage(), containsString("Required [model_settings, chunks]")); } public void testMissingTaskType() throws IOException { - MapperService mapperService = createMapperService(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); + DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); IllegalArgumentException ex = expectThrows( DocumentParsingException.class, IllegalArgumentException.class, - () -> mapperService.documentMapper() - .parse( - semanticTextInferenceSource( - mapperService.getIndexSettings().getIndexVersionCreated(), - b -> b.startObject("field") - .startObject(INFERENCE_FIELD) - .field(INFERENCE_ID_FIELD, "my_id") - .startObject(MODEL_SETTINGS_FIELD) - .endObject() - .endObject() - .endObject() - ) + () -> documentMapper.parse( + source( + b -> b.startObject("field") + .startObject(INFERENCE_FIELD) + .field(INFERENCE_ID_FIELD, "my_id") + .startObject(MODEL_SETTINGS_FIELD) + .endObject() + .endObject() + .endObject() ) + ) ); assertThat(ex.getCause().getMessage(), containsString("failed to parse field [model_settings]")); } @@ -781,24 +738,15 @@ private MapperService mapperServiceForFieldWithModelSettings( MapperService.MergeReason.MAPPING_UPDATE ); - final boolean useInferenceMetadataFieldsFormat = InferenceMetadataFieldsMapper.isEnabled( - mapperService.getIndexSettings().getIndexVersionCreated() - ); - SemanticTextField semanticTextField = new SemanticTextField( - mapperService.getIndexSettings().getIndexVersionCreated(), fieldName, List.of(), - new SemanticTextField.InferenceResult(inferenceId, modelSettings, Map.of()), + new SemanticTextField.InferenceResult(inferenceId, modelSettings, List.of()), XContentType.JSON ); XContentBuilder builder = JsonXContent.contentBuilder().startObject(); - if (useInferenceMetadataFieldsFormat) { - builder.field(InferenceMetadataFieldsMapper.NAME, Map.of(semanticTextField.fieldName(), semanticTextField)); - } else { - builder.field(semanticTextField.fieldName()); - builder.value(semanticTextField); - } + builder.field(semanticTextField.fieldName()); + builder.value(semanticTextField); builder.endObject(); SourceToParse sourceToParse = new SourceToParse("test", BytesReference.bytes(builder), XContentType.JSON); @@ -850,6 +798,266 @@ public void testExistsQueryDenseVector() throws IOException { assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class)); } + public void testInsertValueMapTraversal() throws IOException { + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("test", "value").endObject(); + + Map map = toSourceMap(Strings.toString(builder)); + SemanticTextFieldMapper.insertValue("test", map, "value2"); + assertThat(getMapValue(map, "test"), equalTo("value2")); + SemanticTextFieldMapper.insertValue("something.else", map, "something_else_value"); + assertThat(getMapValue(map, "something\\.else"), equalTo("something_else_value")); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + builder.startObject("path1").startObject("path2").field("test", "value").endObject().endObject(); + builder.endObject(); + + Map map = toSourceMap(Strings.toString(builder)); + SemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); + assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); + SemanticTextFieldMapper.insertValue("path1.path2.test_me", map, "test_me_value"); + assertThat(getMapValue(map, "path1.path2.test_me"), equalTo("test_me_value")); + SemanticTextFieldMapper.insertValue("path1.non_path2.test", map, "test_value"); + assertThat(getMapValue(map, "path1.non_path2\\.test"), equalTo("test_value")); + + SemanticTextFieldMapper.insertValue("path1.path2", map, Map.of("path3", "bar")); + assertThat(getMapValue(map, "path1.path2"), equalTo(Map.of("path3", "bar"))); + + SemanticTextFieldMapper.insertValue("path1", map, "baz"); + assertThat(getMapValue(map, "path1"), equalTo("baz")); + + SemanticTextFieldMapper.insertValue("path3.path4", map, Map.of("test", "foo")); + assertThat(getMapValue(map, "path3\\.path4"), equalTo(Map.of("test", "foo"))); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + builder.startObject("path1").array("test", "value1", "value2").endObject(); + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + SemanticTextFieldMapper.insertValue("path1.test", map, List.of("value3", "value4", "value5")); + assertThat(getMapValue(map, "path1.test"), equalTo(List.of("value3", "value4", "value5"))); + + SemanticTextFieldMapper.insertValue("path2.test", map, List.of("value6", "value7", "value8")); + assertThat(getMapValue(map, "path2\\.test"), equalTo(List.of("value6", "value7", "value8"))); + } + } + + public void testInsertValueListTraversal() throws IOException { + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1"); + { + builder.startArray("path2"); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + } + builder.endObject(); + } + { + builder.startObject("path3"); + { + builder.startArray("path4"); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + SemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); + assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); + SemanticTextFieldMapper.insertValue("path1.path2.test2", map, "value3"); + assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); + assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(Map.of("test", "value2", "test2", "value3")))); + + SemanticTextFieldMapper.insertValue("path3.path4.test", map, "value4"); + assertThat(getMapValue(map, "path3.path4.test"), equalTo("value4")); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1"); + { + builder.startArray("path2"); + builder.startArray(); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + SemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); + assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); + SemanticTextFieldMapper.insertValue("path1.path2.test2", map, "value3"); + assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); + assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(List.of(Map.of("test", "value2", "test2", "value3"))))); + } + } + + public void testInsertValueFieldsWithDots() throws IOException { + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("xxx.yyy", "value1").endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + SemanticTextFieldMapper.insertValue("xxx.yyy", map, "value2"); + assertThat(getMapValue(map, "xxx\\.yyy"), equalTo("value2")); + + SemanticTextFieldMapper.insertValue("xxx", map, "value3"); + assertThat(getMapValue(map, "xxx"), equalTo("value3")); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1.path2"); + { + builder.startObject("path3.path4"); + builder.field("test", "value1"); + builder.endObject(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + SemanticTextFieldMapper.insertValue("path1.path2.path3.path4.test", map, "value2"); + assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test"), equalTo("value2")); + + SemanticTextFieldMapper.insertValue("path1.path2.path3.path4.test2", map, "value3"); + assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test2"), equalTo("value3")); + assertThat(getMapValue(map, "path1\\.path2.path3\\.path4"), equalTo(Map.of("test", "value2", "test2", "value3"))); + } + } + + public void testInsertValueAmbiguousPath() throws IOException { + // Mixed dotted object notation + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1.path2"); + { + builder.startObject("path3"); + builder.field("test1", "value1"); + builder.endObject(); + } + builder.endObject(); + } + { + builder.startObject("path1"); + { + builder.startObject("path2.path3"); + builder.field("test2", "value2"); + builder.endObject(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); + + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test1", map, "value3") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + ex = assertThrows( + IllegalArgumentException.class, + () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test3", map, "value4") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + assertThat(map, equalTo(originalMap)); + } + + // traversal through lists + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1.path2"); + { + builder.startArray("path3"); + builder.startObject().field("test1", "value1").endObject(); + builder.endArray(); + } + builder.endObject(); + } + { + builder.startObject("path1"); + { + builder.startArray("path2.path3"); + builder.startObject().field("test2", "value2").endObject(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); + + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test1", map, "value3") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + ex = assertThrows( + IllegalArgumentException.class, + () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test3", map, "value4") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + assertThat(map, equalTo(originalMap)); + } + } + + public void testInsertValueCannotTraversePath() throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1"); + { + builder.startArray("path2"); + builder.startArray(); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); + + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> SemanticTextFieldMapper.insertValue("path1.path2.test.test2", map, "value2") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.test] has value [value1] of type [String], which cannot be traversed into further") + ); + + assertThat(map, equalTo(originalMap)); + } + @Override protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { // Until a doc is indexed, the query is rewritten as match no docs @@ -871,25 +1079,11 @@ private static void addSemanticTextMapping( mappingBuilder.endObject(); } - private static void addSemanticTextInferenceResults( - IndexVersion indexVersion, - XContentBuilder sourceBuilder, - List semanticTextInferenceResults - ) throws IOException { - final boolean useInferenceMetadataFieldsFormat = InferenceMetadataFieldsMapper.isEnabled(indexVersion); - - if (useInferenceMetadataFieldsFormat) { - // Use a linked hash map to maintain insertion-order iteration over the inference fields - Map inferenceMetadataFields = new LinkedHashMap<>(); - for (var field : semanticTextInferenceResults) { - inferenceMetadataFields.put(field.fieldName(), field); - } - sourceBuilder.field(InferenceMetadataFieldsMapper.NAME, inferenceMetadataFields); - } else { - for (var field : semanticTextInferenceResults) { - sourceBuilder.field(field.fieldName()); - sourceBuilder.value(field); - } + private static void addSemanticTextInferenceResults(XContentBuilder sourceBuilder, List semanticTextInferenceResults) + throws IOException { + for (var field : semanticTextInferenceResults) { + sourceBuilder.field(field.fieldName()); + sourceBuilder.value(field); } } @@ -904,16 +1098,11 @@ static String randomFieldName(int numLevel) { return builder.toString(); } - private static Query generateNestedTermSparseVectorQuery( - IndexVersion indexVersion, - NestedLookup nestedLookup, - String fieldName, - List tokens - ) { + private static Query generateNestedTermSparseVectorQuery(NestedLookup nestedLookup, String fieldName, List tokens) { NestedObjectMapper mapper = nestedLookup.getNestedMappers().get(getChunksFieldName(fieldName)); assertNotNull(mapper); - BitSetProducer parentFilter = new QueryBitSetProducer(Queries.newNonNestedFilter(indexVersion)); + BitSetProducer parentFilter = new QueryBitSetProducer(Queries.newNonNestedFilter(IndexVersion.current())); BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); for (String token : tokens) { queryBuilder.add( @@ -930,21 +1119,6 @@ private static Query generateNestedTermSparseVectorQuery( ); } - private static SourceToParse semanticTextInferenceSource(IndexVersion indexVersion, CheckedConsumer build) - throws IOException { - final boolean useInferenceMetadataFieldsFormat = InferenceMetadataFieldsMapper.isEnabled(indexVersion); - - return source(b -> { - if (useInferenceMetadataFieldsFormat) { - b.startObject(InferenceMetadataFieldsMapper.NAME); - } - build.accept(b); - if (useInferenceMetadataFieldsFormat) { - b.endObject(); - } - }); - } - private static void assertChildLeafNestedDocument( LeafNestedDocuments leaf, int advanceToDoc, @@ -969,4 +1143,68 @@ private static void assertSparseFeatures(LuceneDocument doc, String fieldName, i } assertThat(count, equalTo(expectedCount)); } + + private Map toSourceMap(String source) throws IOException { + try (XContentParser parser = createParser(JsonXContent.jsonXContent, source)) { + return parser.map(); + } + } + + private static Object getMapValue(Map map, String key) { + // Split the path on unescaped "." chars and then unescape the escaped "." chars + final String[] pathElements = Arrays.stream(key.split("(? k.replace("\\.", ".")).toArray(String[]::new); + + Object value = null; + Object nextLayer = map; + for (int i = 0; i < pathElements.length; i++) { + if (nextLayer instanceof Map nextMap) { + value = nextMap.get(pathElements[i]); + } else if (nextLayer instanceof List nextList) { + final String pathElement = pathElements[i]; + List values = nextList.stream().flatMap(v -> { + Stream.Builder streamBuilder = Stream.builder(); + if (v instanceof List innerList) { + traverseList(innerList, streamBuilder); + } else { + streamBuilder.add(v); + } + return streamBuilder.build(); + }).filter(v -> v instanceof Map).map(v -> ((Map) v).get(pathElement)).filter(Objects::nonNull).toList(); + + if (values.isEmpty()) { + return null; + } else if (values.size() > 1) { + throw new AssertionError("List " + nextList + " contains multiple values for [" + pathElement + "]"); + } else { + value = values.getFirst(); + } + } else if (nextLayer == null) { + break; + } else { + throw new AssertionError( + "Path [" + + String.join(".", Arrays.copyOfRange(pathElements, 0, i)) + + "] has value [" + + value + + "] of type [" + + value.getClass().getSimpleName() + + "], which cannot be traversed into further" + ); + } + + nextLayer = value; + } + + return value; + } + + private static void traverseList(List list, Stream.Builder streamBuilder) { + for (Object value : list) { + if (value instanceof List innerList) { + traverseList(innerList, streamBuilder); + } else { + streamBuilder.add(value); + } + } + } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java index 753906f0b2fce..dcdd9b3d42341 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java @@ -9,16 +9,13 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; +import org.elasticsearch.core.Tuple; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.ChunkedInference; import org.elasticsearch.inference.Model; import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.inference.TaskType; import org.elasticsearch.test.AbstractXContentTestCase; -import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; @@ -30,22 +27,18 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; -import java.util.ListIterator; import java.util.Map; import java.util.function.Predicate; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.toSemanticTextFieldChunk; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.toSemanticTextFieldChunks; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; public class SemanticTextFieldTests extends AbstractXContentTestCase { private static final String NAME = "field"; - private IndexVersion currentIndexVersion; - @Override protected Predicate getRandomFieldsExcludeFilter() { return n -> n.endsWith(CHUNKED_EMBEDDINGS_FIELD); @@ -53,64 +46,48 @@ protected Predicate getRandomFieldsExcludeFilter() { @Override protected void assertEqualInstances(SemanticTextField expectedInstance, SemanticTextField newInstance) { - assertThat(newInstance.indexCreatedVersion(), equalTo(newInstance.indexCreatedVersion())); assertThat(newInstance.fieldName(), equalTo(expectedInstance.fieldName())); assertThat(newInstance.originalValues(), equalTo(expectedInstance.originalValues())); assertThat(newInstance.inference().modelSettings(), equalTo(expectedInstance.inference().modelSettings())); assertThat(newInstance.inference().chunks().size(), equalTo(expectedInstance.inference().chunks().size())); SemanticTextField.ModelSettings modelSettings = newInstance.inference().modelSettings(); - for (var entry : newInstance.inference().chunks().entrySet()) { - var expectedChunks = expectedInstance.inference().chunks().get(entry.getKey()); - assertNotNull(expectedChunks); - assertThat(entry.getValue().size(), equalTo(expectedChunks.size())); - for (int i = 0; i < entry.getValue().size(); i++) { - var actualChunk = entry.getValue().get(i); - assertThat(actualChunk.text(), equalTo(expectedChunks.get(i).text())); - assertThat(actualChunk.startOffset(), equalTo(expectedChunks.get(i).startOffset())); - assertThat(actualChunk.endOffset(), equalTo(expectedChunks.get(i).endOffset())); - switch (modelSettings.taskType()) { - case TEXT_EMBEDDING -> { - double[] expectedVector = parseDenseVector( - expectedChunks.get(i).rawEmbeddings(), - modelSettings.dimensions(), - expectedInstance.contentType() - ); - double[] newVector = parseDenseVector( - actualChunk.rawEmbeddings(), - modelSettings.dimensions(), - newInstance.contentType() - ); - assertArrayEquals(expectedVector, newVector, 0.0000001f); - } - case SPARSE_EMBEDDING -> { - List expectedTokens = parseWeightedTokens( - expectedChunks.get(i).rawEmbeddings(), - expectedInstance.contentType() - ); - List newTokens = parseWeightedTokens(actualChunk.rawEmbeddings(), newInstance.contentType()); - assertThat(newTokens, equalTo(expectedTokens)); - } - default -> throw new AssertionError("Invalid task type " + modelSettings.taskType()); + for (int i = 0; i < newInstance.inference().chunks().size(); i++) { + assertThat(newInstance.inference().chunks().get(i).text(), equalTo(expectedInstance.inference().chunks().get(i).text())); + switch (modelSettings.taskType()) { + case TEXT_EMBEDDING -> { + double[] expectedVector = parseDenseVector( + expectedInstance.inference().chunks().get(i).rawEmbeddings(), + modelSettings.dimensions(), + expectedInstance.contentType() + ); + double[] newVector = parseDenseVector( + newInstance.inference().chunks().get(i).rawEmbeddings(), + modelSettings.dimensions(), + newInstance.contentType() + ); + assertArrayEquals(expectedVector, newVector, 0.0000001f); + } + case SPARSE_EMBEDDING -> { + List expectedTokens = parseWeightedTokens( + expectedInstance.inference().chunks().get(i).rawEmbeddings(), + expectedInstance.contentType() + ); + List newTokens = parseWeightedTokens( + newInstance.inference().chunks().get(i).rawEmbeddings(), + newInstance.contentType() + ); + assertThat(newTokens, equalTo(expectedTokens)); } + default -> throw new AssertionError("Invalid task type " + modelSettings.taskType()); } } } @Override protected SemanticTextField createTestInstance() { - currentIndexVersion = randomFrom( - IndexVersionUtils.randomPreviousCompatibleVersion(random(), IndexVersions.INFERENCE_METADATA_FIELDS), - IndexVersionUtils.randomVersionBetween(random(), IndexVersions.INFERENCE_METADATA_FIELDS, IndexVersion.current()) - ); List rawValues = randomList(1, 5, () -> randomSemanticTextInput().toString()); try { // try catch required for override - return randomSemanticText( - currentIndexVersion, - NAME, - TestModel.createRandomInstance(), - rawValues, - randomFrom(XContentType.values()) - ); + return randomSemanticText(NAME, TestModel.createRandomInstance(), rawValues, randomFrom(XContentType.values())); } catch (IOException e) { fail("Failed to create random SemanticTextField instance"); } @@ -119,12 +96,12 @@ protected SemanticTextField createTestInstance() { @Override protected SemanticTextField doParseInstance(XContentParser parser) throws IOException { - return SemanticTextField.parse(parser, new SemanticTextField.ParserContext(currentIndexVersion, NAME, parser.contentType())); + return SemanticTextField.parse(parser, new Tuple<>(NAME, parser.contentType())); } @Override protected boolean supportsUnknownFields() { - return false; + return true; } public void testModelSettingsValidation() { @@ -208,61 +185,30 @@ public static ChunkedInferenceEmbeddingSparse randomChunkedInferenceEmbeddingSpa return new ChunkedInferenceEmbeddingSparse(chunks); } - public static SemanticTextField randomSemanticText( - IndexVersion indexVersion, - String fieldName, - Model model, - List inputs, - XContentType contentType - ) throws IOException { + public static SemanticTextField randomSemanticText(String fieldName, Model model, List inputs, XContentType contentType) + throws IOException { ChunkedInference results = switch (model.getTaskType()) { case TEXT_EMBEDDING -> randomChunkedInferenceEmbeddingFloat(model, inputs); case SPARSE_EMBEDDING -> randomChunkedInferenceEmbeddingSparse(inputs); default -> throw new AssertionError("invalid task type: " + model.getTaskType().name()); }; - return semanticTextFieldFromChunkedInferenceResults(indexVersion, fieldName, model, inputs, results, contentType); + return semanticTextFieldFromChunkedInferenceResults(fieldName, model, inputs, results, contentType); } public static SemanticTextField semanticTextFieldFromChunkedInferenceResults( - IndexVersion indexVersion, String fieldName, Model model, List inputs, ChunkedInference results, XContentType contentType ) throws IOException { - final boolean useInferenceMetadataFields = InferenceMetadataFieldsMapper.isEnabled(indexVersion); - - // In this test framework, we don't perform "real" chunking; each input generates one chunk. Thus, we can assume there is a - // one-to-one relationship between inputs and chunks. Iterate over the inputs and chunks to match each input with its - // corresponding chunk. - final List chunks = new ArrayList<>(inputs.size()); - int offsetAdjustment = 0; - Iterator inputsIt = inputs.iterator(); - Iterator chunkIt = results.chunksAsMatchedTextAndByteReference(contentType.xContent()); - while (inputsIt.hasNext() && chunkIt.hasNext()) { - String input = inputsIt.next(); - var chunk = chunkIt.next(); - chunks.add(toSemanticTextFieldChunk(input, offsetAdjustment, chunk, useInferenceMetadataFields)); - - // When using the inference metadata fields format, all the input values are concatenated so that the - // chunk text offsets are expressed in the context of a single string. Calculate the offset adjustment - // to apply to account for this. - offsetAdjustment = input.length() + 1; // Add one for separator char length - } - - if (inputsIt.hasNext() || chunkIt.hasNext()) { - throw new IllegalArgumentException("Input list size and chunk count do not match"); - } - return new SemanticTextField( - indexVersion, fieldName, - useInferenceMetadataFields ? null : inputs, + inputs, new SemanticTextField.InferenceResult( model.getInferenceEntityId(), new SemanticTextField.ModelSettings(model), - Map.of(fieldName, chunks) + toSemanticTextFieldChunks(List.of(results), contentType) ), contentType ); @@ -286,51 +232,37 @@ public static Object randomSemanticTextInput() { } } - public static ChunkedInference toChunkedResult(Map> matchedTextMap, SemanticTextField field) throws IOException { + public static ChunkedInference toChunkedResult(SemanticTextField field) throws IOException { switch (field.inference().modelSettings().taskType()) { case SPARSE_EMBEDDING -> { List chunks = new ArrayList<>(); - for (var entry : field.inference().chunks().entrySet()) { - String entryField = entry.getKey(); - List entryChunks = entry.getValue(); - List entryFieldMatchedText = validateAndGetMatchedTextForField(matchedTextMap, entryField, entryChunks.size()); - - ListIterator matchedTextIt = entryFieldMatchedText.listIterator(); - for (var chunk : entryChunks) { - var tokens = parseWeightedTokens(chunk.rawEmbeddings(), field.contentType()); - chunks.add( - new ChunkedInferenceEmbeddingSparse.SparseEmbeddingChunk( - tokens, - matchedTextIt.next(), - new ChunkedInference.TextOffset(chunk.startOffset(), chunk.endOffset()) - ) - ); - } + for (var chunk : field.inference().chunks()) { + var tokens = parseWeightedTokens(chunk.rawEmbeddings(), field.contentType()); + chunks.add( + new ChunkedInferenceEmbeddingSparse.SparseEmbeddingChunk( + tokens, + chunk.text(), + new ChunkedInference.TextOffset(0, chunk.text().length()) + ) + ); } return new ChunkedInferenceEmbeddingSparse(chunks); } case TEXT_EMBEDDING -> { List chunks = new ArrayList<>(); - for (var entry : field.inference().chunks().entrySet()) { - String entryField = entry.getKey(); - List entryChunks = entry.getValue(); - List entryFieldMatchedText = validateAndGetMatchedTextForField(matchedTextMap, entryField, entryChunks.size()); - - ListIterator matchedTextIt = entryFieldMatchedText.listIterator(); - for (var chunk : entryChunks) { - double[] values = parseDenseVector( - chunk.rawEmbeddings(), - field.inference().modelSettings().dimensions(), - field.contentType() - ); - chunks.add( - new ChunkedInferenceEmbeddingFloat.FloatEmbeddingChunk( - FloatConversionUtils.floatArrayOf(values), - matchedTextIt.next(), - new ChunkedInference.TextOffset(chunk.startOffset(), chunk.endOffset()) - ) - ); - } + for (var chunk : field.inference().chunks()) { + double[] values = parseDenseVector( + chunk.rawEmbeddings(), + field.inference().modelSettings().dimensions(), + field.contentType() + ); + chunks.add( + new ChunkedInferenceEmbeddingFloat.FloatEmbeddingChunk( + FloatConversionUtils.floatArrayOf(values), + chunk.text(), + new ChunkedInference.TextOffset(0, chunk.text().length()) + ) + ); } return new ChunkedInferenceEmbeddingFloat(chunks); } @@ -338,21 +270,6 @@ public static ChunkedInference toChunkedResult(Map> matched } } - private static List validateAndGetMatchedTextForField( - Map> matchedTextMap, - String fieldName, - int chunkCount - ) { - List fieldMatchedText = matchedTextMap.get(fieldName); - if (fieldMatchedText == null) { - throw new IllegalStateException("No matched text list exists for field [" + fieldName + "]"); - } else if (fieldMatchedText.size() != chunkCount) { - throw new IllegalStateException("Matched text list size does not equal chunk count for field [" + fieldName + "]"); - } - - return fieldMatchedText; - } - private static double[] parseDenseVector(BytesReference value, int numDims, XContentType contentType) { try (XContentParser parser = XContentHelper.createParserNotCompressed(XContentParserConfiguration.EMPTY, value, contentType)) { parser.nextToken(); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextUtilsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextUtilsTests.java deleted file mode 100644 index e334335d6c78d..0000000000000 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextUtilsTests.java +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.mapper; - -import org.elasticsearch.common.Strings; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentParser; -import org.elasticsearch.xcontent.json.JsonXContent; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.stream.Stream; - -import static org.hamcrest.Matchers.equalTo; - -public class SemanticTextUtilsTests extends ESTestCase { - public void testInsertValueMapTraversal() throws IOException { - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("test", "value").endObject(); - - Map map = toSourceMap(Strings.toString(builder)); - SemanticTextUtils.insertValue("test", map, "value2"); - assertThat(getMapValue(map, "test"), equalTo("value2")); - SemanticTextUtils.insertValue("something.else", map, "something_else_value"); - assertThat(getMapValue(map, "something\\.else"), equalTo("something_else_value")); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - builder.startObject("path1").startObject("path2").field("test", "value").endObject().endObject(); - builder.endObject(); - - Map map = toSourceMap(Strings.toString(builder)); - SemanticTextUtils.insertValue("path1.path2.test", map, "value2"); - assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); - SemanticTextUtils.insertValue("path1.path2.test_me", map, "test_me_value"); - assertThat(getMapValue(map, "path1.path2.test_me"), equalTo("test_me_value")); - SemanticTextUtils.insertValue("path1.non_path2.test", map, "test_value"); - assertThat(getMapValue(map, "path1.non_path2\\.test"), equalTo("test_value")); - - SemanticTextUtils.insertValue("path1.path2", map, Map.of("path3", "bar")); - assertThat(getMapValue(map, "path1.path2"), equalTo(Map.of("path3", "bar"))); - - SemanticTextUtils.insertValue("path1", map, "baz"); - assertThat(getMapValue(map, "path1"), equalTo("baz")); - - SemanticTextUtils.insertValue("path3.path4", map, Map.of("test", "foo")); - assertThat(getMapValue(map, "path3\\.path4"), equalTo(Map.of("test", "foo"))); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - builder.startObject("path1").array("test", "value1", "value2").endObject(); - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextUtils.insertValue("path1.test", map, List.of("value3", "value4", "value5")); - assertThat(getMapValue(map, "path1.test"), equalTo(List.of("value3", "value4", "value5"))); - - SemanticTextUtils.insertValue("path2.test", map, List.of("value6", "value7", "value8")); - assertThat(getMapValue(map, "path2\\.test"), equalTo(List.of("value6", "value7", "value8"))); - } - } - - public void testInsertValueListTraversal() throws IOException { - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1"); - { - builder.startArray("path2"); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - } - builder.endObject(); - } - { - builder.startObject("path3"); - { - builder.startArray("path4"); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextUtils.insertValue("path1.path2.test", map, "value2"); - assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); - SemanticTextUtils.insertValue("path1.path2.test2", map, "value3"); - assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); - assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(Map.of("test", "value2", "test2", "value3")))); - - SemanticTextUtils.insertValue("path3.path4.test", map, "value4"); - assertThat(getMapValue(map, "path3.path4.test"), equalTo("value4")); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1"); - { - builder.startArray("path2"); - builder.startArray(); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextUtils.insertValue("path1.path2.test", map, "value2"); - assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); - SemanticTextUtils.insertValue("path1.path2.test2", map, "value3"); - assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); - assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(List.of(Map.of("test", "value2", "test2", "value3"))))); - } - } - - public void testInsertValueFieldsWithDots() throws IOException { - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("xxx.yyy", "value1").endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextUtils.insertValue("xxx.yyy", map, "value2"); - assertThat(getMapValue(map, "xxx\\.yyy"), equalTo("value2")); - - SemanticTextUtils.insertValue("xxx", map, "value3"); - assertThat(getMapValue(map, "xxx"), equalTo("value3")); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1.path2"); - { - builder.startObject("path3.path4"); - builder.field("test", "value1"); - builder.endObject(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextUtils.insertValue("path1.path2.path3.path4.test", map, "value2"); - assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test"), equalTo("value2")); - - SemanticTextUtils.insertValue("path1.path2.path3.path4.test2", map, "value3"); - assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test2"), equalTo("value3")); - assertThat(getMapValue(map, "path1\\.path2.path3\\.path4"), equalTo(Map.of("test", "value2", "test2", "value3"))); - } - } - - public void testInsertValueAmbiguousPath() throws IOException { - // Mixed dotted object notation - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1.path2"); - { - builder.startObject("path3"); - builder.field("test1", "value1"); - builder.endObject(); - } - builder.endObject(); - } - { - builder.startObject("path1"); - { - builder.startObject("path2.path3"); - builder.field("test2", "value2"); - builder.endObject(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); - - IllegalArgumentException ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextUtils.insertValue("path1.path2.path3.test1", map, "value3") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextUtils.insertValue("path1.path2.path3.test3", map, "value4") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - assertThat(map, equalTo(originalMap)); - } - - // traversal through lists - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1.path2"); - { - builder.startArray("path3"); - builder.startObject().field("test1", "value1").endObject(); - builder.endArray(); - } - builder.endObject(); - } - { - builder.startObject("path1"); - { - builder.startArray("path2.path3"); - builder.startObject().field("test2", "value2").endObject(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); - - IllegalArgumentException ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextUtils.insertValue("path1.path2.path3.test1", map, "value3") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextUtils.insertValue("path1.path2.path3.test3", map, "value4") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - assertThat(map, equalTo(originalMap)); - } - } - - public void testInsertValueCannotTraversePath() throws IOException { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1"); - { - builder.startArray("path2"); - builder.startArray(); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); - - IllegalArgumentException ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextUtils.insertValue("path1.path2.test.test2", map, "value2") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.test] has value [value1] of type [String], which cannot be traversed into further") - ); - - assertThat(map, equalTo(originalMap)); - } - - private Map toSourceMap(String source) throws IOException { - try (XContentParser parser = createParser(JsonXContent.jsonXContent, source)) { - return parser.map(); - } - } - - private static Object getMapValue(Map map, String key) { - // Split the path on unescaped "." chars and then unescape the escaped "." chars - final String[] pathElements = Arrays.stream(key.split("(? k.replace("\\.", ".")).toArray(String[]::new); - - Object value = null; - Object nextLayer = map; - for (int i = 0; i < pathElements.length; i++) { - if (nextLayer instanceof Map nextMap) { - value = nextMap.get(pathElements[i]); - } else if (nextLayer instanceof List nextList) { - final String pathElement = pathElements[i]; - List values = nextList.stream().flatMap(v -> { - Stream.Builder streamBuilder = Stream.builder(); - if (v instanceof List innerList) { - traverseList(innerList, streamBuilder); - } else { - streamBuilder.add(v); - } - return streamBuilder.build(); - }).filter(v -> v instanceof Map).map(v -> ((Map) v).get(pathElement)).filter(Objects::nonNull).toList(); - - if (values.isEmpty()) { - return null; - } else if (values.size() > 1) { - throw new AssertionError("List " + nextList + " contains multiple values for [" + pathElement + "]"); - } else { - value = values.getFirst(); - } - } else if (nextLayer == null) { - break; - } else { - throw new AssertionError( - "Path [" - + String.join(".", Arrays.copyOfRange(pathElements, 0, i)) - + "] has value [" - + value - + "] of type [" - + value.getClass().getSimpleName() - + "], which cannot be traversed into further" - ); - } - - nextLayer = value; - } - - return value; - } - - private static void traverseList(List list, Stream.Builder streamBuilder) { - for (Object value : list) { - if (value instanceof List innerList) { - traverseList(innerList, streamBuilder); - } else { - streamBuilder.add(value); - } - } - } -} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index 1a66d5eb57317..36aa2200eceae 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -27,8 +27,6 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; -import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; @@ -67,11 +65,11 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.Map; import static org.apache.lucene.search.BooleanClause.Occur.FILTER; import static org.apache.lucene.search.BooleanClause.Occur.MUST; import static org.apache.lucene.search.BooleanClause.Occur.SHOULD; +import static org.elasticsearch.index.IndexVersions.NEW_SPARSE_VECTOR; import static org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig.DEFAULT_RESULTS_FIELD; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -123,15 +121,12 @@ protected Collection> getPlugins() { @Override protected Settings createTestIndexSettings() { - IndexVersion indexVersion = randomFrom( - IndexVersionUtils.randomVersionBetween( - random(), - IndexVersions.SEMANTIC_TEXT_FIELD_TYPE, - IndexVersionUtils.getPreviousVersion(IndexVersions.INFERENCE_METADATA_FIELDS) - ), - IndexVersionUtils.randomVersionBetween(random(), IndexVersions.INFERENCE_METADATA_FIELDS, IndexVersion.current()) - ); - return Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, indexVersion).build(); + // Randomize index version within compatible range + // we have to prefer CURRENT since with the range of versions we support it's rather unlikely to get the current actually. + IndexVersion indexVersionCreated = randomBoolean() + ? IndexVersion.current() + : IndexVersionUtils.randomVersionBetween(random(), NEW_SPARSE_VECTOR, IndexVersion.current()); + return Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, indexVersionCreated).build(); } @Override @@ -153,11 +148,7 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws private void applyRandomInferenceResults(MapperService mapperService) throws IOException { // Parse random inference results (or no inference results) to set up the dynamic inference result mappings under the semantic text // field - SourceToParse sourceToParse = buildSemanticTextFieldWithInferenceResults( - inferenceResultType, - denseVectorElementType, - mapperService.getIndexSettings().getIndexVersionCreated() - ); + SourceToParse sourceToParse = buildSemanticTextFieldWithInferenceResults(inferenceResultType, denseVectorElementType); if (sourceToParse != null) { ParsedDocument parsedDocument = mapperService.documentMapper().parse(sourceToParse); mapperService.merge( @@ -207,7 +198,7 @@ private void assertSparseEmbeddingLuceneQuery(Query query) { Query innerQuery = assertOuterBooleanQuery(query); assertThat(innerQuery, instanceOf(SparseVectorQueryWrapper.class)); var sparseQuery = (SparseVectorQueryWrapper) innerQuery; - assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class)); + assertThat(((SparseVectorQueryWrapper) innerQuery).getTermsQuery(), instanceOf(BooleanQuery.class)); BooleanQuery innerBooleanQuery = (BooleanQuery) sparseQuery.getTermsQuery(); assertThat(innerBooleanQuery.clauses().size(), equalTo(queryTokenCount)); @@ -343,11 +334,8 @@ public void testSerializingQueryWhenNoInferenceId() throws IOException { private static SourceToParse buildSemanticTextFieldWithInferenceResults( InferenceResultType inferenceResultType, - DenseVectorFieldMapper.ElementType denseVectorElementType, - IndexVersion indexVersion + DenseVectorFieldMapper.ElementType denseVectorElementType ) throws IOException { - final boolean useInferenceMetadataFields = InferenceMetadataFieldsMapper.isEnabled(indexVersion); - SemanticTextField.ModelSettings modelSettings = switch (inferenceResultType) { case NONE -> null; case SPARSE_EMBEDDING -> new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null); @@ -362,21 +350,15 @@ private static SourceToParse buildSemanticTextFieldWithInferenceResults( SourceToParse sourceToParse = null; if (modelSettings != null) { SemanticTextField semanticTextField = new SemanticTextField( - indexVersion, SEMANTIC_TEXT_FIELD, - null, - new SemanticTextField.InferenceResult(INFERENCE_ID, modelSettings, Map.of(SEMANTIC_TEXT_FIELD, List.of())), + List.of(), + new SemanticTextField.InferenceResult(INFERENCE_ID, modelSettings, List.of()), XContentType.JSON ); XContentBuilder builder = JsonXContent.contentBuilder().startObject(); - if (useInferenceMetadataFields) { - builder.startObject(InferenceMetadataFieldsMapper.NAME); - } - builder.field(semanticTextField.fieldName(), semanticTextField); - if (useInferenceMetadataFields) { - builder.endObject(); - } + builder.field(semanticTextField.fieldName()); + builder.value(semanticTextField); builder.endObject(); sourceToParse = new SourceToParse("test", BytesReference.bytes(builder), XContentType.JSON); } diff --git a/x-pack/plugin/inference/src/test/resources/org/elasticsearch/xpack/inference/highlight/sample-doc.json b/x-pack/plugin/inference/src/test/resources/org/elasticsearch/xpack/inference/highlight/sample-doc.json deleted file mode 100644 index 2ae09697d55af..0000000000000 --- a/x-pack/plugin/inference/src/test/resources/org/elasticsearch/xpack/inference/highlight/sample-doc.json +++ /dev/null @@ -1,4310 +0,0 @@ -{ - "_inference_fields": { - "dense_field": { - "inference": { - "inference_id": ".multilingual-e5-small-elasticsearch", - "model_settings": { - "task_type": "text_embedding", - "dimensions": 384, - "similarity": "cosine", - "element_type": "float" - }, - "chunks": { - "field": [ - { - "start_offset": 0, - "end_offset": 1329, - "embeddings": [ - 0.04979738, - -0.049024884, - -0.06267286, - -0.06284121, - 0.031987894, - -0.08689449, - 0.044664543, - 0.076699525, - 0.06471937, - 0.028753767, - 0.02369647, - 0.011940286, - 0.041063324, - -0.0031249018, - -0.012605156, - 0.020917466, - 0.0698649, - -0.07892161, - -0.010968826, - -0.060116883, - 0.012380837, - -0.022275316, - -0.02007232, - 0.053651124, - 0.045564346, - 0.06287834, - -0.026206115, - 0.034378637, - 0.028786598, - -0.07342769, - -0.05178595, - -0.03394133, - 0.06494073, - -0.07682645, - 0.039480515, - 8.8730786E-4, - -0.035883103, - -0.02245836, - 0.051104713, - -0.02161596, - -0.0014411546, - 0.011840296, - 0.044061452, - 0.018550612, - 0.07816852, - 0.023765374, - -0.04192663, - 0.056223065, - -0.029935915, - -0.039640833, - -0.061091922, - 0.048074532, - 0.03252561, - 0.07752945, - 0.0374488, - -0.0938137, - -0.06071223, - -0.053990547, - -0.06630911, - 0.040193927, - 0.038531914, - -0.023115646, - -0.0046846615, - 0.025255106, - 0.074686274, - 0.10130572, - 0.06328507, - 0.017575556, - -0.040289026, - -0.013285351, - -0.06927493, - 0.08576633, - -0.003492294, - -0.041360997, - 0.036476493, - 0.04270745, - 0.060671005, - -0.0651591, - 0.014901469, - -0.04655241, - -0.006525806, - -0.037813406, - -0.02792913, - 0.0472146, - -0.07142533, - 0.06478618, - 0.0716035, - -0.04885643, - 0.073330306, - -0.055672232, - 0.057761118, - 0.07276108, - -0.076485656, - -0.06970012, - -0.0692586, - -0.051378023, - -0.06273683, - 0.03469511, - 0.05773398, - -0.08031594, - 0.10501066, - 7.310874E-4, - 0.050745558, - -0.016756695, - -0.031716295, - 0.0050844094, - 0.031707063, - -0.039744828, - 0.05837439, - -0.09262242, - -0.04054004, - -0.0075583286, - 0.061934657, - 0.035783943, - -0.055616625, - -0.047291458, - -0.027218537, - -0.011617415, - 0.026992036, - -0.03259098, - 0.08588563, - -0.015476044, - -0.04406553, - -0.058256716, - -0.049162734, - -0.018606737, - 0.02703335, - 0.023426747, - 0.028659008, - 0.014869456, - 0.04368826, - 0.03709602, - 0.0059531354, - 0.012405994, - 0.023295961, - 0.09050855, - -0.025719937, - -0.038713705, - 0.02654418, - -0.07065918, - -0.04294843, - 0.050370634, - -0.0033409365, - 0.052235987, - 0.07693816, - 0.043221552, - 0.07534102, - -0.048658077, - 0.06533618, - -0.016787754, - 0.034524675, - -0.0312765, - 0.05486932, - 0.06286382, - 0.03278902, - -0.06772777, - -0.087687664, - -0.0665437, - 0.032016467, - 0.066101246, - -0.11844821, - -0.032777846, - -0.053238686, - -0.015841002, - -0.067591116, - -0.048692815, - -0.013267198, - 0.09390532, - -0.029956369, - -0.021315884, - -0.03857401, - 0.03929155, - -0.023058, - 0.051734913, - -0.023478175, - 0.035602726, - -0.08242782, - 0.058339056, - 0.045796614, - 0.05448777, - -0.047254823, - 0.020266606, - -0.08056925, - 0.0015524789, - -0.041604258, - 0.00718068, - -0.044556983, - 0.02106678, - 0.04749506, - -0.01840031, - 0.023407241, - 0.070747316, - -0.04295862, - -0.07703961, - -0.0660327, - 0.013576343, - -0.023668775, - 0.056404322, - 0.09587012, - 0.05701044, - -0.036240827, - -0.004225128, - 0.0067939283, - 0.035346694, - 0.026707595, - 0.017638108, - -0.032440145, - 0.04708931, - 0.012399647, - 0.07325736, - 0.027942428, - -0.08172854, - -0.07065871, - 0.033890083, - -0.033598673, - -0.08178952, - 0.028348992, - 0.04411821, - -0.044644725, - 0.03074351, - 0.0935692, - -0.04762361, - 0.051226508, - -0.08009367, - -0.03847554, - 0.016323369, - 0.038776945, - -0.059975337, - -0.057062503, - 0.010849399, - -0.030187564, - -0.026308322, - -0.067967005, - -0.079719126, - -0.08646553, - -0.09048591, - -0.018597756, - 0.0047154897, - 0.058588482, - -0.09175631, - -0.08307076, - -0.035472285, - 0.009541795, - -0.026162423, - 0.03335252, - 0.018381111, - -0.015803808, - 0.021074254, - -0.010327698, - 0.025227644, - 0.06197503, - -0.059137702, - -0.018673804, - 0.00707259, - -0.019355131, - 0.026796991, - 0.025893785, - 0.0685412, - -0.06301929, - 0.003187423, - 0.029026637, - -0.019066911, - 0.09354283, - 0.1061943, - 0.053696748, - -0.0016658951, - -0.0030081598, - -0.028984388, - -0.037768397, - -0.035445668, - -0.026019065, - 0.028805656, - 0.021448314, - -0.059069104, - -0.06226507, - -0.05508101, - 0.022365203, - 0.09221683, - -0.07698258, - -0.055819187, - 0.061300304, - 0.05965072, - 0.029480126, - 0.057750076, - 0.05180143, - -0.0159732, - -0.0823228, - 0.09240897, - -0.08318623, - 0.002020457, - 0.010953976, - -0.09685372, - 0.05271347, - -0.04232834, - 0.061398283, - 0.044973806, - -0.02088832, - 0.044399235, - -0.014687839, - 0.06304118, - -0.022936989, - -0.033005796, - 0.074231274, - 0.023939423, - -0.087914266, - 0.036014125, - 0.0062753465, - -0.03355067, - 0.036039222, - 0.012712498, - 0.057161637, - 0.05654562, - -0.018600527, - -0.035825036, - 0.06950757, - 0.05828935, - 3.8511172E-4, - -0.008722925, - -0.0522819, - -0.10943554, - -0.033790745, - -0.03357093, - -0.031342223, - -0.07834354, - 0.032603115, - 0.026984481, - -0.02969966, - -0.048259087, - -0.012163297, - 0.007793295, - 0.05574152, - -0.022157356, - -0.03623348, - 0.037055306, - -0.033247784, - -0.0070533184, - -0.057643052, - 0.08567554, - -0.07278431, - -0.06556353, - 0.0308075, - 0.052940007, - -0.0566871, - 0.0287218, - -0.06409354, - -0.0627855, - 0.06254832, - -0.027221028, - -0.049813032, - 0.03935744, - 0.07234624, - -0.09398941, - 0.011342199, - 0.028675176, - -0.022932779, - 0.009481765, - -0.022316003, - -0.015413267, - 0.039174553, - 0.061736017, - -0.04229645, - -0.052905895, - 0.018588098, - 0.070939854, - 0.0748456, - 0.08648295, - -0.036223643, - 0.008473833, - 0.053857446, - -0.07680301, - 0.0785199, - 0.03982, - -0.039509695, - 0.03373825, - -0.063460656, - -0.038993217, - -0.073084034, - 0.062789686, - -0.081148736, - -0.035036374, - 0.0754924, - 0.087299235, - 0.04096056, - 0.027776068 - ] - }, - { - "start_offset": 1281, - "end_offset": 2685, - "embeddings": [ - 0.035266396, - -0.044093177, - -0.04158629, - -0.045926083, - 0.06521479, - -0.050932676, - 0.03961649, - 0.037828345, - 0.025232289, - 0.029732272, - 0.034696255, - -8.805868E-4, - 0.053202488, - -0.0047244085, - -0.037418325, - 0.0770543, - 0.105328426, - -0.036611717, - -0.039531372, - -0.082817726, - 0.021342339, - -0.01843601, - -0.042259317, - 0.06317797, - 0.036926534, - 0.069380246, - -0.059219223, - 0.043066744, - -0.006286799, - -0.06797077, - -0.042236328, - -0.036919896, - 0.034179892, - -0.026980922, - 0.051384695, - 0.03826208, - -0.012975077, - -0.025295, - 0.015923942, - -0.027602347, - -0.022515642, - -5.98229E-4, - 0.06122002, - 0.050380763, - 0.04684541, - 0.08975921, - -0.03755087, - 0.046912387, - -0.038697798, - -0.06988436, - -0.05219296, - 0.041337684, - 0.023435602, - 0.023100449, - 0.0352068, - -0.060556572, - -0.042356305, - -0.04503575, - -0.07377149, - 0.084542595, - 0.028644886, - -0.024366854, - -0.009185593, - 0.01255741, - 0.06999743, - 0.09439326, - 0.03800093, - -0.008208419, - -0.09673358, - 0.0023752274, - -0.07626475, - 0.098563485, - -0.012569254, - -0.08954541, - -0.010908005, - 0.016228944, - 0.05984263, - -0.051004995, - 0.024147974, - -0.050623365, - -0.01668758, - -0.007899899, - -0.029833568, - 0.034388572, - -0.03505155, - 0.08271141, - 0.08569518, - -0.053716324, - 0.06806682, - -0.067159526, - 0.043537326, - 0.09806787, - -0.041304354, - -0.05103136, - -0.109280586, - -0.06120091, - -0.09363793, - 0.032154918, - 0.12145496, - -0.049101993, - 0.07359592, - -0.010511772, - 0.074003994, - -0.013990566, - -0.026140982, - 0.052602872, - 0.09067435, - -0.070553906, - 0.057253607, - -0.048433788, - -0.024026526, - 0.018851176, - 0.04090621, - 0.058670815, - -0.08735305, - -0.022817774, - -0.042838365, - -0.016657954, - 0.03224679, - -0.01952135, - 0.016957905, - -2.0869492E-4, - -0.0039428347, - -0.05186959, - -0.062616155, - -0.056938402, - 0.00882266, - 0.055156156, - 0.03221514, - 0.026071686, - 0.073993444, - 0.060973227, - 0.040219847, - 0.030080495, - 0.074190594, - 0.10667069, - -0.035753082, - -0.031658202, - 0.024792355, - -0.056956623, - -0.04320206, - 0.042175233, - -0.04459597, - 0.063075, - 0.03682348, - 0.087945856, - 0.060606126, - -0.02543529, - 0.101843245, - -0.02052844, - 0.065993346, - -0.01580399, - 0.01996002, - 0.025750767, - 0.044288505, - -0.055157375, - -0.0834102, - -0.07820265, - 0.01860491, - 0.052071907, - -0.082538106, - -0.06682723, - -0.031070147, - -5.8769673E-4, - -0.05546835, - -0.041754596, - 0.007750717, - 0.06550786, - -0.024858464, - -0.018027157, - -0.070528544, - 0.04311053, - -0.04646167, - 0.038272627, - -0.023141516, - 0.035724208, - -0.044601943, - 0.031177005, - 0.060686704, - -0.008791896, - -0.045239996, - -0.0015549486, - -0.023560282, - -0.02124949, - -0.028758224, - -0.01994061, - -0.031099308, - 0.033113, - 0.04315839, - -0.014818203, - -0.016493127, - 0.03928858, - -0.049371842, - -0.057269108, - -0.07144285, - 0.045010682, - -0.02822895, - 0.026698994, - 0.08181065, - 0.0497983, - -0.0033907534, - -0.023786934, - 0.013289109, - 0.011108559, - 0.075379916, - 0.012320797, - -0.045297462, - 0.09245994, - -0.027429234, - 0.058199212, - 0.06857553, - -0.0705278, - -0.055046707, - 0.025127407, - -0.044880733, - -0.07819047, - -0.016903652, - 0.031777192, - -0.027202426, - 0.033661053, - 0.082595035, - -0.010536667, - 0.067396104, - -0.048291907, - -0.038250096, - 0.009253138, - 0.040732533, - -0.06330689, - -0.074753396, - 0.04644269, - -0.029993957, - -0.033248927, - -0.053877644, - -0.098819815, - -0.0260293, - -0.030682972, - -0.034318104, - -0.014064486, - -0.020334287, - -0.12791014, - -0.017047742, - -0.052973263, - 0.017977173, - -0.04006773, - 0.066867575, - -0.07052264, - -0.02385362, - 0.028173303, - -0.07004571, - 0.053027462, - 0.039910827, - -0.026693301, - -0.07183149, - -0.073637374, - 0.008942395, - 0.012631494, - 0.040236488, - 0.07312884, - -0.1052349, - 0.013788912, - 0.05933606, - -0.012417836, - 0.07844875, - 0.035665687, - 0.0692123, - 0.011978119, - 0.0032255524, - -0.02082568, - -0.027911682, - -0.008114962, - -0.100171834, - 0.012006536, - 0.027355125, - -0.069779284, - -0.06982269, - -0.02499225, - 0.06460924, - 0.10172508, - -0.036987256, - -0.027838582, - 0.06524349, - 0.03478602, - 0.047589943, - 0.0034753575, - 0.035028856, - 0.03955437, - -0.056392808, - 0.097454645, - -0.067250304, - -0.016183723, - -0.010761581, - -0.046665948, - 0.052830804, - -0.06562526, - 0.0143448245, - 0.035826858, - -0.030075911, - 0.074224986, - -0.01484229, - 0.047223467, - -0.05010028, - -0.08323114, - 0.024850823, - 0.0035780836, - -0.04660368, - 0.012318496, - 0.035511326, - -0.006625753, - 0.023968346, - 0.04152267, - 0.066447295, - 0.031807587, - -0.026121954, - -0.06298641, - 0.09144068, - 0.07982457, - -0.047639504, - -0.011746696, - -0.03417992, - -0.066457696, - -0.015668094, - -0.036196046, - -0.0029406173, - -0.054462895, - 0.0029062356, - 0.019851439, - 0.0064928187, - -0.06603669, - 0.016133538, - 0.0441623, - -0.013663719, - -0.027901169, - -0.05862742, - 0.035473794, - -0.080742985, - -0.012147599, - -0.06269955, - 0.045475967, - -0.07024215, - -0.09113673, - 0.018147662, - 0.037072584, - -0.011495025, - 0.049087547, - 0.00970628, - -0.043941073, - 0.052213665, - -0.027107846, - -0.05408287, - 0.04391075, - 0.05903725, - -0.11579457, - 0.0179941, - 0.023727184, - -0.027765218, - 0.058974497, - -0.041185096, - -0.06411593, - 0.05297974, - 0.014402285, - -0.07491701, - -0.046273973, - 0.025595015, - 0.072552234, - 0.07913544, - 0.05780724, - 0.010108354, - -0.032680638, - 0.07236567, - -0.059348762, - 0.07916222, - 0.06330368, - -0.040674247, - 0.014580703, - -0.056963094, - -0.05973973, - -0.028593862, - 0.054875106, - -0.083951905, - -0.030538274, - 0.04507664, - 0.057579767, - 0.047284584, - 0.029037142 - ] - }, - { - "start_offset": 2660, - "end_offset": 3932, - "embeddings": [ - 0.060263444, - -0.011627793, - -0.07406454, - -0.061137985, - 0.035276245, - -0.06492958, - 0.036304567, - 0.03849267, - 0.032589767, - 0.034697585, - 0.055276874, - 0.0067610983, - 0.07107068, - -0.028453767, - -0.023335157, - 0.066190325, - 0.09514554, - -0.031573914, - -0.036566608, - -0.03254594, - 0.01258663, - -0.008238347, - -0.024652604, - 0.058704935, - 0.029146092, - 0.0538354, - -0.033388253, - 0.035337757, - 0.048961233, - -0.06575967, - -0.060514227, - -0.054762013, - 0.049676932, - -0.062150035, - -0.019077798, - 0.018297857, - -0.043477535, - -0.06992983, - 0.041489013, - -0.06091549, - 0.00857616, - 0.0013787356, - 0.059843466, - 0.065656655, - 0.07694915, - 0.07400389, - -0.008740612, - 0.02598118, - -0.04293424, - -0.029819168, - -0.057130232, - 0.08674767, - 0.0020843677, - 0.094413035, - 0.026790254, - -0.07488432, - -0.06260386, - -0.059874497, - -0.022945922, - 0.07328087, - 0.0012629362, - -0.014891515, - -0.017552191, - 0.04158861, - 0.074740976, - 0.13079657, - 0.03465537, - 0.033060353, - -0.071494736, - -0.042101286, - -0.09333479, - 0.075504355, - -0.048976846, - -0.07538883, - 0.016815975, - 0.014265034, - 0.04265424, - -0.055298902, - 0.021028202, - -0.043243185, - -0.035213232, - -0.03872826, - -0.03735794, - -0.009753857, - -0.06591585, - 0.06382551, - 0.070999734, - -0.07432682, - 0.051665448, - -0.06200163, - 0.035289973, - 0.052576542, - -0.08547946, - -0.051438782, - -0.06883237, - -0.04034897, - -0.1139505, - 0.029103009, - 0.056813173, - -0.033878006, - 0.065993756, - 0.0012909115, - 0.030890198, - -0.026131464, - -0.042535, - 0.044831734, - 0.075214975, - -0.045039084, - 0.056481812, - -0.052748743, - -0.042459268, - 0.016207209, - 0.032704834, - 0.04342557, - -0.031859122, - -0.037544478, - -0.023973966, - -0.056660555, - 0.03458018, - -0.043174002, - 0.07610799, - -0.040468093, - -0.041871496, - -0.04984353, - -0.040546015, - -0.019524354, - 0.04170828, - 0.020450952, - 0.0404415, - 0.03985574, - 0.032101743, - 0.05156037, - 0.048545454, - 0.03334057, - 0.025009904, - 0.090053804, - -0.030840183, - 0.0017696177, - 0.01567415, - -0.04152217, - -0.031758398, - 0.020865917, - -0.05755524, - 0.04980784, - 0.050742626, - 0.07122176, - 0.06281647, - -0.012783542, - 0.08377948, - -0.029796185, - 0.017047247, - 0.011766123, - 0.03557249, - 0.019037597, - 0.028088165, - -0.07208148, - -0.08005564, - -0.057871744, - 0.0153855365, - 0.054635677, - -0.05614729, - -0.031374976, - -0.06079491, - -0.041638877, - -0.055767294, - -0.048497472, - -0.007389678, - 0.012500725, - 0.02392964, - -0.03444656, - -0.032773327, - 0.050030876, - -0.062147807, - 0.03894452, - 0.005381243, - 0.005100098, - -0.082184665, - 0.01259893, - 0.06914528, - 0.0502573, - -0.014370648, - -0.039859537, - -0.06393138, - -0.061919075, - -0.014192415, - -0.032273103, - -0.0464307, - -7.1235467E-4, - 0.051684704, - -0.006423554, - 0.0010265269, - 0.057130195, - -0.044715635, - -0.08753112, - -0.060454912, - 0.04602993, - -0.009173136, - 0.030031096, - 0.05415974, - 0.040149722, - -0.030073693, - -0.0026639393, - 0.06262825, - 0.0073858122, - 0.07543514, - 0.013202129, - -0.055555925, - 0.076006316, - 0.0069068773, - 0.037352845, - 0.05844025, - -0.087049164, - -0.0934209, - 0.021478496, - -0.06904104, - -0.035960656, - 0.012564326, - 0.08203622, - -0.0589588, - 0.038763568, - 0.059626605, - -0.0015563822, - 0.056733213, - -0.06597729, - -0.0487247, - 0.030533105, - 0.059536766, - -0.043689486, - -0.044405177, - 0.039805703, - -0.033027582, - -0.034072082, - -0.080049135, - -0.08942587, - 0.019459073, - -0.044563998, - -0.06931994, - 0.021550108, - 0.022951653, - -0.051044974, - -0.03676219, - -0.050016202, - 0.03538716, - -0.06436871, - 0.09116231, - -0.03250418, - -0.008333591, - 0.02689493, - -0.023252478, - 0.04825159, - 0.07439804, - -0.08796822, - -0.04385184, - -0.05042988, - -0.056784004, - 0.057135444, - 0.055787697, - 0.056427166, - -0.09837734, - -0.0036608325, - 0.013839507, - -0.020212527, - 0.09865649, - 0.080563836, - 0.07525103, - 0.033415828, - -0.02267602, - -0.067864396, - -0.05965757, - -0.010466497, - -0.047837727, - 0.017926434, - 0.032667693, - -0.069811225, - -0.011690649, - -0.044193, - 0.023269301, - 0.07142345, - -0.0031622057, - -0.0047916253, - 0.07077121, - 0.03767678, - 0.03410683, - 0.036370695, - 0.01696176, - -0.026317174, - -0.008320507, - 0.09212631, - -0.07694487, - -0.034243643, - 0.0110022295, - -0.060418822, - 0.07019466, - -0.051362276, - 0.078166254, - 0.055226926, - -0.04018289, - 0.063233584, - -0.032110535, - 0.08297619, - -0.009597479, - -0.057851054, - 0.042411964, - 0.01997834, - -0.07460758, - 0.061238132, - 0.0050869896, - 0.023704918, - 0.03991232, - 0.07121017, - 0.067201145, - 0.04065065, - -0.05990329, - -0.04676335, - 0.08255157, - 0.039478876, - -0.05370604, - -0.015417656, - -0.061638564, - -0.113423236, - -0.020872636, - -0.06506326, - -0.019086778, - -0.07550901, - 0.023448454, - 0.031439524, - -0.018936215, - -0.061786037, - 0.06917624, - -0.016625067, - 0.04495578, - -0.05168137, - -0.06433023, - 0.019382514, - -0.030735377, - 0.010870069, - -0.05917494, - 0.033261493, - -0.04571641, - -0.078268915, - 0.03133073, - 0.04491661, - -0.036725685, - 0.05521663, - -0.02092035, - -0.04205282, - 0.035851613, - -0.0015220186, - -0.02102678, - 0.054027468, - 0.07405003, - -0.09111273, - 0.005834604, - 0.053133536, - -0.018385805, - 0.024131889, - -0.04136735, - -0.060419146, - 0.006526669, - 0.046679422, - -0.07396608, - -0.031180743, - 0.032524955, - 0.05950253, - 0.08502798, - 0.05705178, - 0.041140076, - 0.015673824, - 0.052156717, - 0.008876251, - 0.05783481, - 0.06875354, - -0.01914275, - 0.019451428, - 0.0017306518, - -0.09160311, - -0.06650555, - 0.06903168, - -0.11052152, - -0.08185994, - 0.0152816, - 0.056960557, - 0.06667231, - 0.042444445 - ] - }, - { - "start_offset": 3811, - "end_offset": 5053, - "embeddings": [ - 0.08132793, - -0.047893565, - -0.038560215, - -0.03994145, - 0.0558572, - -0.03973998, - 0.020470386, - 0.058355197, - 0.01980108, - 0.03896921, - 0.04879353, - -0.0074668517, - 0.05397047, - -0.010254351, - -0.042885937, - 0.08040558, - 0.091155075, - -0.052957732, - -0.035930026, - -0.03653066, - 0.013761402, - -0.018923452, - -0.04685841, - 0.04731581, - 0.027308341, - 0.020014657, - -0.04545417, - 0.028795317, - 0.04793647, - -0.0704067, - -0.042252712, - -0.05682541, - 0.066968046, - -0.09382263, - 0.02506045, - 0.019845745, - -0.015298284, - -0.044756494, - 0.032255, - -0.03357616, - -0.01634103, - 0.012012115, - 0.05378444, - 0.036496706, - 0.06764162, - 0.08833494, - -0.021727582, - 0.0363613, - -0.08750663, - -0.006557421, - -0.037404615, - 0.083952226, - -0.005245814, - 0.06731529, - 0.027517168, - -0.069114335, - -0.06600843, - -0.055819273, - -0.09175115, - 0.0908832, - 0.045391496, - -0.03755004, - 0.0018628142, - 0.015974216, - 0.034663454, - 0.07421443, - 0.045072228, - -0.0134752095, - -0.053152926, - -0.011296686, - -0.052672, - 0.064373136, - -0.009546203, - -0.08377613, - -0.0018304663, - 0.023774406, - 0.029625371, - -0.07841949, - 0.025992012, - -0.034211818, - -0.04341797, - -0.074051395, - -0.022789141, - -0.014875852, - -0.050796572, - 0.08730017, - 0.09586879, - -0.06974203, - 0.048677806, - -0.04995857, - 0.038378827, - 0.06020236, - -0.060032416, - -0.05082279, - -0.08157444, - -0.05524207, - -0.09547329, - 0.061129954, - 0.07330997, - -0.060067892, - 0.08218149, - -0.011082627, - 0.041907076, - -0.0016668623, - -0.020462176, - 0.0074657737, - 0.04153701, - -0.053815063, - 0.08984907, - -0.04856424, - -0.019359102, - 0.025180845, - 0.0580883, - 0.051315922, - -0.07716719, - -0.06010258, - -0.024739653, - -0.020786842, - 0.021310974, - -0.049855735, - 0.058490653, - -0.019344086, - -0.064905055, - -0.043594714, - -0.0414785, - -0.026626132, - 0.010384775, - 0.035636406, - 0.023757294, - 0.02353357, - 0.038512193, - 0.043469686, - 0.025641369, - 0.06005725, - 0.033108205, - 0.093584485, - -0.008513592, - 0.001993488, - 0.0266426, - -0.0135798985, - -0.058448963, - 0.030007407, - -0.03873391, - 0.012962885, - 0.03407742, - 0.052897573, - 0.048484456, - -0.0037075893, - 0.10519477, - -0.05359505, - 0.062401634, - -0.02432665, - 0.006226394, - 0.027923357, - 0.0724623, - -0.050624184, - -0.08479024, - -0.08688512, - 0.032354686, - 0.06821751, - -0.077089824, - -0.0014580968, - -0.04177363, - -0.027564395, - -0.0448798, - -0.042052064, - -0.009614605, - 0.07208001, - 7.672266E-4, - -0.075805336, - -0.05364635, - 0.06561775, - -0.032068495, - 0.04494038, - -0.044013828, - -0.0190166, - -0.022102332, - 0.034658328, - 0.050540138, - -0.01942592, - -0.020942092, - -0.02782304, - -0.065396436, - -0.04059357, - -0.049896274, - -0.0376796, - -0.043743063, - 0.040360678, - 0.07515184, - -0.018274747, - -0.009190847, - 0.055620983, - -0.041216724, - -0.073044226, - -0.05465287, - 0.010405976, - -0.013486699, - 0.02830836, - 0.06836122, - 0.020561688, - -0.01688864, - -0.020571496, - 0.04652389, - 0.020004654, - 0.060006775, - 0.00938477, - -0.05559232, - 0.08781834, - -0.025533192, - 0.052398734, - 0.057509296, - -0.09851155, - -0.09180138, - 0.038183447, - -0.06369883, - -0.054243114, - 0.020855743, - 0.10808265, - -0.04326038, - 0.023134552, - 0.088371366, - -0.03126334, - 0.044376496, - -0.07867371, - -0.03890121, - 0.051151622, - 0.037706945, - -0.03370568, - -0.008004474, - 0.041355547, - -0.023588097, - -0.026358435, - -0.04786497, - -0.108022444, - -0.04574715, - -0.03736998, - -0.048178125, - 0.034921553, - 0.06676284, - -0.060398124, - -0.024748335, - -0.02818482, - 0.02239888, - -0.07246388, - 0.04970122, - -0.010178895, - -0.010817003, - 0.05318733, - -0.050516233, - 0.04490196, - 0.057144474, - -0.031509876, - -0.06828971, - -0.057091262, - -0.041589297, - 0.034988903, - 0.05772322, - 0.08349064, - -0.07048785, - 0.02914558, - 0.037508357, - -0.018101186, - 0.09606959, - 0.09399272, - 0.033781327, - 0.026298832, - -0.007974394, - -0.04828518, - -0.030074345, - -0.008707313, - -0.06095452, - 0.0052815387, - 0.053281322, - -0.07403459, - -0.04375484, - -0.0024250182, - 0.030269688, - 0.08677468, - -0.044580005, - -0.023698311, - 0.09059957, - 0.03502518, - 0.039508294, - 0.03801833, - 0.051657647, - -0.023771202, - -0.021416105, - 0.08418382, - -0.07468558, - -0.022965085, - -0.037451513, - -0.070336066, - 0.07278321, - -0.06958301, - 0.061745293, - 0.034864236, - -0.05098527, - 0.075577505, - -0.01925352, - 0.028659336, - -0.01881169, - -0.09233528, - 0.052659664, - 0.046592344, - -0.08144535, - 0.04045172, - 0.021832049, - 0.01539719, - 0.036698546, - 0.048459183, - 0.0750458, - 0.03523083, - -0.093105264, - -0.042830218, - 0.08817936, - 0.05500005, - -0.03145603, - 0.002137886, - -0.09369107, - -0.0859627, - -0.00988302, - -0.03224872, - 0.009135905, - -0.07538188, - 0.01729995, - 0.05211995, - -0.028220842, - -0.09644254, - 0.08197546, - 0.021641405, - 0.044149674, - -0.02265579, - -0.03705849, - 0.0066629667, - -0.038971607, - 0.0077898037, - -0.07302501, - 0.050258975, - -0.031734023, - -0.05120743, - 0.006855154, - 0.03317757, - -0.054895062, - 0.020226864, - -0.028702717, - -0.054496907, - 0.03333692, - -0.01552826, - -0.024065949, - 0.034094118, - 0.06990785, - -0.11025783, - -0.022972278, - 0.094185725, - -0.034931783, - 0.045400895, - 0.0029167728, - -0.040711746, - 0.0069749537, - 0.02316794, - -0.07623587, - -0.032300122, - 0.040407263, - 0.056106865, - 0.084427394, - 0.09241687, - -0.014235544, - -9.3176577E-4, - 0.056472927, - -0.066110075, - 0.07017728, - 0.06319923, - -0.026196225, - 0.013847319, - -0.047189496, - -0.034471143, - -0.035234082, - 0.015169919, - -0.06258794, - -0.044817522, - 0.052238535, - 0.052592035, - 0.024454227, - 0.04652183 - ] - }, - { - "start_offset": 5013, - "end_offset": 6270, - "embeddings": [ - 0.050837185, - -0.058507636, - -0.08578978, - -0.07158996, - 0.062322024, - -0.06394126, - 0.033397503, - 0.066029586, - 0.059980292, - 0.014527764, - 0.027411256, - -0.019332865, - 0.09169677, - -0.028353753, - -0.024152989, - 0.026958432, - 0.06263654, - -0.057214282, - -0.01730705, - -0.06580778, - 0.012587115, - -0.0013240383, - -0.034304086, - 0.07279054, - 0.03153362, - 0.022333346, - -0.019766338, - 0.01765917, - 0.018127792, - -0.031060342, - -0.035549946, - -0.055531062, - 0.020338904, - -0.102598086, - 0.01697388, - 0.01325798, - -0.05225683, - -0.028536074, - 0.018755725, - -0.03648683, - 0.0047455817, - 0.007937342, - 0.05206842, - 0.07168695, - 0.08550893, - 0.0469701, - -0.053452007, - 0.050660927, - -0.028207462, - -0.038872562, - -0.044887412, - 0.0740998, - -0.013441051, - 0.07634305, - 0.0055091325, - -0.11408244, - -0.06909077, - -0.07962894, - -0.066142306, - 0.07568293, - 0.0025674207, - -0.080196865, - -0.006201128, - 0.00818501, - 0.07924847, - 0.10414052, - 0.042439207, - 0.035281047, - -0.040974326, - -0.04297422, - -0.024786443, - 0.06963027, - -0.016090378, - -0.077486746, - 0.013267866, - 0.0382188, - 0.075773925, - -0.045972046, - 0.021897435, - -0.057650458, - -0.026901621, - -0.047625203, - 0.0012063365, - 0.025827816, - -0.023581855, - 0.059192963, - 0.06759525, - -0.06503824, - 0.051352326, - -0.04751885, - 0.06295226, - 0.03710186, - -0.05161417, - -0.049769994, - -0.08769117, - -0.045511324, - -0.051784497, - 0.056573063, - 0.040720508, - -0.035331022, - 0.073139556, - -8.214206E-4, - 0.037490595, - -0.0021819966, - -0.024999384, - 0.019722067, - 0.024325203, - -0.044025563, - 0.06545914, - -0.019343818, - -0.0023573453, - 0.0018968938, - 0.06038538, - 0.02333629, - -0.06574865, - -0.027746813, - -0.025081333, - -0.014503653, - 0.02887482, - -0.034452263, - 0.07113403, - -0.03859757, - -0.06710839, - -0.0383765, - -0.06811556, - 0.0061613885, - 0.034110006, - 0.05640678, - 0.06142383, - 0.009073967, - 0.043047454, - 0.03466423, - 0.027530612, - 0.032211494, - 0.053615883, - 0.07377551, - -0.01758648, - -0.02144349, - 0.03956204, - -0.031308886, - -0.062522896, - 0.07004273, - -0.041059777, - 0.03381151, - 0.096379966, - 0.059807573, - 0.076913215, - 7.038924E-4, - 0.081829004, - -0.06641827, - 0.044492118, - -0.036664132, - 0.08141791, - 0.039923627, - 0.079390235, - -0.05483655, - -0.092164926, - -0.07556358, - 0.024775334, - 0.039525755, - -0.052411165, - -0.044712305, - -0.1251298, - 0.019936236, - -0.05971529, - -0.071407795, - -0.013429681, - 0.045429856, - 9.2904486E-7, - -0.011094936, - -0.053897448, - 0.05120436, - -0.051203646, - 0.05109921, - -3.9564449E-4, - -0.0018849113, - -0.04667166, - 0.051931337, - 0.07190472, - 0.03911436, - 0.0045251944, - -0.048008155, - -0.03397076, - -0.028034845, - -0.048654392, - -0.02667819, - -0.04844982, - 0.04652294, - 0.08667334, - -0.03595206, - 0.0059883194, - 0.04574355, - -0.049042065, - -0.0949724, - -0.0883229, - 0.022961965, - 0.0010751986, - 0.034764428, - 0.07906372, - 0.063135885, - 0.011506904, - -0.01975833, - 0.036684997, - 0.060913093, - 0.045704674, - 0.007864406, - -0.10908467, - 0.05677562, - -0.011089532, - 0.038626347, - 0.009512805, - -0.064039044, - -0.072748266, - 0.077210315, - -0.038597148, - -0.035940252, - 0.028666161, - 0.07342884, - -0.05140841, - 0.03324692, - 0.087146066, - -0.063568234, - 0.046904817, - -0.101345256, - -0.089092165, - 0.020936692, - 0.03865168, - -0.05066454, - -0.020703398, - 0.037939124, - -0.069670096, - -0.04573288, - -0.042975515, - -0.08133061, - -0.04999254, - -0.07754444, - -0.015807157, - 0.005468936, - 0.058917798, - -0.047519706, - -0.011129669, - -0.023593048, - 0.017224371, - -0.08876406, - 0.021552147, - -0.0042216736, - 3.2073245E-4, - 0.020970272, - -0.018367162, - 0.05507523, - 0.049186505, - -0.053686555, - -0.05892317, - -0.04681065, - -0.0346258, - 0.025476422, - 0.018746119, - 0.07847266, - -0.061995696, - 0.054043338, - 0.05290739, - -0.03922319, - 0.09967812, - 0.11260788, - 0.079831325, - 0.038233027, - -0.007090767, - -0.025567437, - -0.059230927, - -0.0053755366, - -0.05934471, - 0.019243969, - 0.028365586, - -0.092337005, - -0.042283885, - -0.02478212, - 0.036973756, - 0.06046009, - -0.08319817, - -0.03466979, - 0.0052572396, - 0.03651634, - 0.0098519325, - 0.054537416, - 0.106752776, - -0.03245272, - -0.021710223, - 0.067718424, - -0.0716523, - -0.0467586, - 0.04351528, - -0.06902318, - 0.0840498, - -0.06641164, - 0.049778968, - 0.068722665, - 0.006945258, - 0.052571226, - -0.018321687, - 0.08851911, - -0.06484523, - -0.05621622, - 0.0138798375, - 0.062657684, - -0.044570502, - 0.04102728, - 0.018748704, - -0.00942585, - 0.031132046, - 0.028199397, - 0.04842188, - 0.05593715, - -0.059101623, - -0.06402159, - 0.098776296, - 0.02233127, - -0.026724212, - -0.0065241847, - -0.04349072, - -0.034313653, - 0.0035007112, - -0.05192231, - -0.038924325, - -0.06474185, - 0.015219527, - 0.015206849, - -0.006182916, - -0.047223445, - 0.03093224, - 0.0028494631, - 0.029578412, - -0.03084317, - -0.064933576, - 0.04518858, - -0.039695684, - 0.00936517, - -0.057235852, - 0.07411994, - -0.03560979, - -0.058608506, - 0.011952328, - 0.038545735, - -0.0027342425, - 0.034396514, - -0.05941442, - -0.059142824, - 0.07352255, - -0.043796647, - -0.02323201, - 0.021158574, - 0.04281619, - -0.06509553, - 0.025277078, - 0.028309572, - -0.025768865, - 0.017667482, - -0.054695044, - -0.0071169212, - 0.024850225, - 0.045802698, - -0.06463908, - -0.06887592, - 0.015381043, - 0.07519754, - 0.057192106, - 0.04958389, - -0.0055669746, - 0.011448934, - 0.03116414, - -0.047596138, - 0.0854336, - 0.04283707, - -0.0740198, - 0.012606065, - -0.06125597, - -0.051641334, - -0.08642954, - 0.051201824, - -0.06496548, - -0.052257292, - 0.042111978, - 0.06265747, - 0.020205691, - 0.030658716 - ] - }, - { - "start_offset": 6143, - "end_offset": 7446, - "embeddings": [ - 0.0424085, - -0.034002542, - -0.03464202, - -0.050363787, - 0.07952863, - -0.06934173, - 0.032258246, - 0.0323823, - 0.058361948, - 0.024646914, - 0.033364307, - 0.014893917, - 0.082809135, - -0.029873388, - -0.029152617, - 0.04554002, - 0.0795821, - -0.036626082, - -0.0474332, - -0.07305637, - 0.013581792, - -0.004326934, - -0.014103911, - 0.034649894, - -0.0026006806, - 0.02861443, - -0.04941399, - 0.04220857, - 0.03800667, - -0.08277502, - 0.0030204614, - -0.053834133, - 0.056124337, - -0.049811907, - 0.039426923, - 0.020071387, - -0.058887776, - -0.028534504, - 0.017018566, - -0.058147434, - -0.004793465, - 0.044247996, - 0.09460399, - 0.015196105, - 0.06281946, - 0.044713628, - -0.060649756, - 0.027246455, - -0.076060586, - -0.049838327, - -0.08404265, - 0.029550698, - -0.03708172, - 0.07957659, - 0.005638496, - -0.06591597, - -0.06454032, - -0.031200824, - -0.08628952, - 0.063782215, - 0.07779158, - -0.030862262, - -5.435849E-4, - 0.019658469, - 0.057832543, - 0.07795239, - 0.0381484, - -7.929322E-4, - -0.0592228, - -0.005782202, - -0.030597664, - 0.087376595, - -0.010526408, - -0.048925165, - -0.02034168, - 0.03517407, - 0.11462333, - -0.045529578, - 0.03299401, - -0.037767082, - -0.042070027, - -0.058737356, - -0.024921589, - 0.034654282, - -0.055172887, - 0.06289939, - 0.020921186, - -0.05699275, - 0.09581658, - -0.06115032, - 0.08512388, - 0.054141954, - -0.0934276, - -0.105145365, - -0.08745115, - -0.06042352, - -0.07095655, - 0.055074938, - 0.0759865, - -0.0045393603, - 0.06166128, - -0.0054426217, - -0.0013491446, - 0.020781914, - -0.013829525, - 0.012210793, - 0.0570243, - -0.026055835, - 0.050172452, - -0.0491802, - -0.03582268, - 0.0012494406, - 0.040490862, - 0.040501244, - -0.098037206, - -0.039755426, - -0.022896642, - 0.003485195, - 0.016366435, - -0.026002685, - 0.06318523, - -0.050691966, - -0.09513729, - -0.064722195, - -0.06132966, - -0.020495446, - 0.014939301, - 0.054761756, - 0.028909337, - -0.0023375573, - 0.042052656, - 0.022837669, - 0.0230999, - 0.03036407, - 0.018764673, - 0.072496034, - -0.036595833, - -0.036863085, - 0.028396215, - -0.091672495, - -0.08657466, - 0.047359336, - -0.055880774, - 0.0070424355, - 0.069609754, - 0.043904763, - 0.07389961, - -0.0059867557, - 0.116695836, - -0.03913718, - 0.036678135, - -0.010901363, - 0.08819442, - 0.03855831, - 0.07974421, - -0.051924232, - -0.10385839, - -0.033763383, - 0.019493395, - 0.050680365, - -0.058339395, - -0.02083137, - -0.08609875, - 0.017414644, - -0.063257225, - -0.056500446, - 0.023052368, - 0.04622413, - -0.018110551, - -0.007981176, - -0.024779806, - 0.0448911, - -0.08686634, - 0.06575812, - -0.04816167, - 0.049937073, - -0.04870519, - 0.078450456, - 0.06596584, - 0.026573703, - -0.054720048, - -0.016695132, - -0.06281992, - -0.033874605, - -0.034129698, - -0.018373003, - -0.050729766, - 0.037208032, - 0.08663066, - 0.0057553193, - 0.018936101, - 0.0683749, - -0.019277481, - -0.111216776, - -0.08299779, - 0.064380944, - -0.023994485, - 0.02228393, - 0.037532013, - 0.027998803, - 0.010780377, - -0.02866339, - 0.035218086, - 0.040947795, - 0.047251962, - 0.022822948, - -0.04361859, - 0.03929657, - -0.02838609, - 0.06326206, - 0.061787914, - -0.06487332, - -0.05326772, - 0.08467877, - -0.037987698, - -0.030701924, - 0.03693124, - 0.079549454, - -0.06695752, - 0.038511194, - 0.059876252, - -0.04255189, - 0.04926685, - -0.06254431, - -0.056073554, - 0.0059021385, - 0.06375891, - -0.028473105, - -0.020516206, - 0.053688798, - -0.0505003, - -0.013776076, - -0.056746498, - -0.074674286, - -0.036429465, - -0.078277834, - -0.033130404, - 0.026524864, - 0.010027121, - -0.052846454, - -0.03245234, - -0.0045730877, - 0.06279463, - -0.09209112, - 0.030202646, - -0.027974173, - -0.018735087, - 0.0051772078, - -0.034461137, - 0.031503055, - 0.024202514, - -0.0384219, - -0.028417397, - -0.0141932685, - -0.01493018, - 0.05603126, - 0.032856, - 0.0636288, - -0.08880921, - 0.0027978476, - 0.07799859, - -0.0328014, - 0.1109901, - 0.103224635, - 0.021524789, - 0.06495574, - 0.008971255, - -0.025503872, - -0.05471651, - -0.037969336, - -0.052947987, - 0.025896605, - 0.040142477, - -0.04655958, - -0.037604652, - -0.04057517, - 0.024616593, - 0.10586181, - -0.018084457, - -0.045486886, - 0.043346837, - 0.040528644, - 0.07145432, - 0.06723152, - 0.0444014, - 0.039035454, - -0.01685273, - 0.09862476, - -0.04053366, - -0.011219273, - 9.4339694E-4, - -0.04893209, - 0.08255836, - -0.06254635, - 0.0643953, - 0.057366677, - -0.035574544, - 0.05627519, - -0.053370558, - 0.07825556, - -0.0464488, - -0.06944344, - 0.06384285, - 0.022012226, - -0.059294943, - 0.015924655, - 0.015040029, - -0.024862552, - 0.0372234, - 0.07461155, - 0.037966266, - 0.05571149, - -0.062487237, - -0.05230138, - 0.09539987, - 0.050107345, - -0.045335423, - -0.008107003, - -0.04972419, - -0.053539097, - -0.022092147, - 0.0025375162, - -0.034666307, - -0.02502986, - -0.0051417495, - 0.051072195, - 0.0013976014, - -0.05035485, - 0.032701, - 0.029351933, - 0.030166088, - -0.056991193, - -0.05375353, - 0.046652608, - -0.0428863, - -0.029472742, - -0.052559793, - 0.091564216, - -0.080590494, - -0.0837016, - -0.019702932, - 0.039997194, - -0.006878238, - 0.03106036, - 0.0039084614, - -0.0647739, - 0.047937315, - -0.04196034, - -0.016512591, - 0.002820003, - 0.06303794, - -0.08405546, - 0.026794465, - 0.027069453, - -0.01786329, - 0.014802783, - -0.05162349, - -0.013761013, - -0.008544942, - 0.058489725, - -0.04009345, - -0.07866012, - 0.050363623, - 0.03921136, - 0.10168464, - 0.017203555, - -0.036566544, - -0.0041820146, - 0.017140131, - -0.04071419, - 0.028168127, - 0.04408699, - -0.051891476, - 0.018359438, - -0.05747516, - -0.042995404, - -0.050385248, - 0.016142845, - -0.097052485, - -0.054681405, - 0.015732065, - 0.04252675, - 0.04927429, - 0.034856237 - ] - }, - { - "start_offset": 7274, - "end_offset": 8428, - "embeddings": [ - 0.053351756, - -0.016210953, - -0.07376261, - -0.053941812, - 0.02817351, - -0.049927928, - 0.037769757, - 0.024953691, - 0.08055997, - 0.032674763, - 0.052936487, - 0.036146153, - 0.09430347, - -0.0028838688, - -0.007466441, - 0.023164729, - 0.10583723, - -0.031802896, - -0.041414317, - -0.0475711, - 0.009346337, - -0.0023871146, - -0.02213494, - 0.050703954, - 0.035117928, - 0.049729533, - -0.041396488, - 0.040562224, - 0.0072581097, - -0.08263742, - -0.0562156, - -0.015488454, - 0.05251555, - -0.093467265, - 0.023409631, - 0.025775665, - -0.044880413, - -0.049109295, - 0.047048803, - 0.0037931658, - -0.0067197834, - 0.06803116, - 0.07420838, - -5.630403E-4, - 0.081702, - 0.06873878, - -0.0719469, - 0.07724739, - -0.05212626, - -0.042729367, - -0.042923346, - 0.03461211, - 0.0384493, - 0.07852812, - 0.010787158, - -0.08513074, - -0.061220147, - -0.064391315, - -0.05105939, - 0.052274473, - 0.051858254, - -0.025238348, - -0.00587187, - 0.027783165, - 0.08390886, - 0.09118287, - 0.0045411596, - -0.007192923, - -0.03402139, - -0.0055287075, - -0.023308607, - 0.048499316, - 0.0056659714, - -0.055594128, - 0.006816471, - 0.06142901, - 0.069629386, - -0.06880756, - 0.03697912, - 4.030213E-4, - -0.016491663, - -0.04839326, - -0.07392797, - 0.043547455, - -0.056421243, - 0.04223018, - 0.08332315, - -0.067911245, - 0.090487525, - -0.055714566, - 0.08206281, - 0.06703987, - -0.08389162, - -0.057403725, - -0.08070137, - -0.08085191, - -0.06221053, - 0.022357801, - 0.05380439, - -0.057247546, - 0.082033284, - -0.040765326, - 0.013981313, - -0.0040798467, - -0.026184458, - 0.041849125, - 0.0670039, - -0.054438762, - 0.05614216, - -0.042283792, - -0.011577375, - -0.005841353, - 0.053594112, - 0.046762522, - -0.052612707, - -0.057888422, - -0.041523386, - -0.024746502, - -0.0075298445, - -0.064313106, - 0.07056589, - -0.060802132, - -0.066174984, - -0.028887944, - -0.045796074, - -0.032927513, - 0.020563344, - 0.03263002, - 0.062557735, - 0.017696919, - 0.07386037, - 0.03261784, - 0.049800515, - 0.030138545, - 0.08249261, - 0.09115441, - -0.042155825, - -0.03988317, - 0.020776471, - -0.051469974, - -0.08725858, - 0.03421217, - -0.05013289, - 0.013482718, - 0.064455256, - 0.03295194, - 0.05072303, - 0.006866378, - 0.07478394, - -0.08232063, - 0.019163597, - 0.004971397, - 0.04126514, - 0.058498725, - 0.051773094, - -0.075701, - -0.10187357, - -0.04737017, - 0.024935009, - 0.05112209, - -0.06950842, - -0.043909222, - -0.08784876, - 0.024858471, - -0.09546347, - -0.066443644, - -0.039961666, - 0.038705625, - 0.024331694, - -8.98396E-4, - -0.05572306, - 0.029712915, - -0.03771733, - 0.03198425, - -0.018850418, - 0.029596135, - -0.03073546, - 0.040810063, - 0.05748256, - 0.073663406, - -4.3307795E-4, - -0.012033559, - -0.04193751, - -0.025243256, - 0.0020644036, - -0.045018397, - -0.041560806, - 0.052930553, - 0.019955857, - -0.026577184, - -0.008272473, - 0.021633727, - -0.025493031, - -0.0703225, - -0.06678734, - 0.03229182, - 0.0071383226, - 0.034542687, - 0.059906006, - 0.053990763, - -0.03435307, - -0.013460787, - 0.0066855447, - 0.06581118, - 0.03435488, - -0.013016893, - -0.06384082, - 0.04292309, - -0.01003905, - 0.07465682, - 0.041681886, - -0.09872228, - -0.073181555, - 0.06117674, - -0.037698943, - -0.04354557, - 0.015390995, - 0.016960131, - -0.08594164, - -0.0031558785, - 0.053712446, - -0.022476645, - 0.049800374, - -0.091516644, - -0.054994736, - 0.0021578616, - 0.0319539, - -0.037861917, - -0.035363257, - 0.029294293, - -0.038181435, - -0.032684956, - -0.059862334, - -0.052932844, - -0.058168415, - -0.09271316, - -0.03091905, - 0.058375362, - 0.033076484, - -0.048589885, - -0.0471485, - -0.036419317, - 0.0197355, - -0.09041303, - 0.005987353, - -0.04762716, - -0.025347468, - 0.01992799, - -0.040301844, - 0.028963821, - 0.04351864, - -0.07274519, - -0.029667713, - 0.002675472, - -0.008265489, - 0.024745574, - 0.015290826, - 0.05244983, - -0.06499378, - 0.062229145, - 0.056773033, - -0.013647868, - 0.10126457, - 0.07742867, - 0.06907199, - 0.064441785, - -0.03506488, - -0.0027899756, - -0.043987043, - -0.049338706, - -0.06806032, - 0.025320068, - 0.07688298, - -0.037168447, - -0.015209554, - -0.04958993, - 0.029053042, - 0.078892104, - -0.05066037, - -0.030179376, - 0.047830258, - 0.05499768, - 0.04351645, - 0.052307993, - 0.044633888, - 0.020583658, - -0.033953577, - 0.095311515, - -0.0630289, - 0.007157878, - 0.038106248, - -0.035896186, - 0.082412794, - -0.029322542, - 0.09868366, - 0.055021353, - -0.0075476193, - 0.06234535, - -0.070212856, - 0.059051443, - -0.034478117, - -0.062892415, - 0.051439803, - 0.027673196, - -0.08141708, - 0.051184427, - 0.0028761302, - 0.016736014, - 0.05301783, - 0.070441864, - 0.034725133, - 0.07278133, - -0.034562826, - -0.08274096, - 0.04781931, - 0.067391045, - -0.028286146, - 0.045300007, - -0.070981935, - -0.0900906, - -0.01804769, - -0.07678485, - -0.054171197, - -0.04371682, - 0.044014435, - 0.019092314, - -0.0533041, - -0.05406611, - 0.001399687, - 0.008414226, - 0.0070721963, - -0.054595735, - -0.06279298, - 0.012740916, - -0.068271995, - -0.016297301, - -0.018569002, - 0.07028272, - -0.021509787, - -0.07611714, - 0.00775331, - 0.043958176, - -0.015166803, - 0.057754774, - -0.013378479, - -0.06428601, - 0.033813998, - -0.03535889, - -0.0053371727, - 0.022787765, - 0.0827088, - -0.12142623, - 0.0026697267, - 0.03981775, - -0.02158926, - 0.03722548, - -0.04657821, - -0.049696047, - 0.027757794, - 0.046377983, - -0.049581885, - -0.08924511, - 0.035119716, - 0.07465048, - 0.07677282, - 0.053386416, - -0.020686079, - 0.013271858, - 0.057107273, - -0.016681688, - 0.015427299, - 0.046444256, - -0.0758986, - 0.03103317, - 0.0036917871, - -0.07186075, - -0.0624062, - 0.043409187, - -0.054538824, - -0.065254256, - 0.05370674, - 0.03439175, - 0.02197341, - 0.025227817 - ] - }, - { - "start_offset": 8427, - "end_offset": 9687, - "embeddings": [ - 0.05744903, - -0.02452922, - -0.08476994, - -0.022428924, - 0.048399355, - -0.036132727, - -0.015275069, - 0.074007444, - 0.07940483, - 0.02248898, - 0.04316835, - -0.0034011744, - 0.08490044, - -4.1730207E-5, - -0.038465198, - 0.047819026, - 0.072968654, - -0.0597117, - 0.01257942, - -0.058731165, - 0.01321756, - 0.015429294, - -0.04443649, - 0.067764916, - 0.032255292, - 0.057302598, - -0.013705533, - -0.002871075, - -0.0017963633, - -0.076624624, - -0.04033174, - -0.03958768, - 0.021592977, - -0.083355255, - 0.02508422, - 0.014075689, - -0.042936496, - -0.069775715, - 0.053771127, - -0.06096773, - -0.038709678, - 0.030585166, - 0.06309865, - 0.0289272, - 0.070409566, - 0.075638674, - -0.039296776, - 0.02741248, - -0.041558262, - -0.009545241, - -0.060929116, - 0.017809264, - 0.04246089, - 0.092424795, - 0.0044749626, - -0.084972195, - -0.071833394, - -0.05189755, - -0.05925639, - 0.07651771, - 0.051788367, - -0.053483434, - -0.021773372, - -0.00506648, - 0.038404945, - 0.06250312, - 0.061945193, - 0.011288415, - -0.060019504, - -0.026446447, - -0.055844307, - 0.06780296, - -0.03332657, - -0.048795506, - 0.03756737, - 0.045220662, - 0.034406263, - -0.058406588, - 0.018282196, - -0.09083589, - -0.03040247, - -0.05790508, - -0.016188977, - 0.022804815, - -0.056110263, - 0.07543798, - 0.038187183, - -0.08649141, - 0.08623204, - -0.042687863, - 0.0573812, - 0.050730344, - -0.0433588, - -0.09344185, - -0.046142764, - -0.07739427, - -0.05609858, - 0.052337695, - 0.053889126, - -0.05016094, - 0.096083306, - 0.011468343, - 0.042769995, - 0.008584574, - -0.028934095, - 0.029772492, - 0.05292526, - -0.024594065, - 0.08542614, - -0.066132575, - -0.0076108603, - 0.0075524007, - 0.09586245, - 0.07127726, - -0.08062749, - -0.06285386, - -0.034123085, - 0.053412784, - 0.03723955, - -0.033416737, - 0.04680435, - -0.03861024, - -0.027420327, - -0.081069514, - -0.059449777, - -0.023693249, - 0.023154624, - 0.052628066, - 0.053673804, - 0.03851477, - 0.048254706, - 0.040450633, - 0.024582013, - 0.030465266, - 0.07089921, - 0.087507665, - -0.009536147, - -0.014239722, - 0.0023720453, - -0.03707558, - -0.025194108, - 0.08157714, - -0.03958548, - 0.051691998, - 0.06314976, - 0.02721075, - 0.052713513, - -0.023559293, - 0.06393838, - -0.07106552, - 0.044660386, - -0.025641244, - 0.06264186, - 0.014594412, - 0.048385747, - -0.055564065, - -0.06955722, - -0.088032804, - 0.034305904, - 0.045169048, - -0.03802287, - -0.013604237, - -0.08036378, - 0.022200659, - -0.055803996, - -0.084766835, - -0.03537992, - 0.0466811, - -0.01768934, - -0.04932191, - -0.028891142, - 0.0119931735, - -0.030645167, - 0.02563793, - 0.011760058, - 0.02289236, - -0.052902717, - 0.0097223595, - 0.042422734, - 0.020096473, - 0.0088921515, - -0.013737467, - -0.03993987, - -0.05381494, - -0.04218381, - -0.03449234, - -0.054990627, - 0.009642538, - 0.05949224, - -0.007698366, - 0.027766742, - 0.031446908, - -0.08122337, - -0.038493186, - -0.06830541, - 0.020205725, - -0.030477056, - 0.044251017, - 0.08096215, - 0.10125872, - -0.009518375, - -0.018208051, - 0.04083479, - 0.021746838, - 0.030360037, - 0.0030146895, - -0.04425533, - 0.063152075, - -0.040584363, - 0.07283654, - 0.062402766, - -0.072093405, - -0.07191966, - 0.041823577, - 0.004934987, - -0.037696403, - 0.032516938, - 0.072518826, - -0.06659665, - -0.006708449, - 0.07320258, - -0.028489655, - 0.0686214, - -0.07320168, - -0.03665047, - 0.020025352, - 0.018766245, - -0.025394067, - -0.043893065, - 0.013678436, - -0.0817917, - -0.02630837, - -0.03421568, - -0.0654703, - -0.042911462, - -0.07311668, - -0.0038604182, - 0.016762605, - 0.021780867, - -0.06629608, - -0.012976095, - -0.051092017, - 0.011383003, - -0.11568767, - 0.056158062, - -0.011376737, - 0.020621011, - 0.015717132, - -0.01347594, - 0.018848866, - 0.039923675, - -0.06502122, - -0.044894896, - -0.032492988, - -0.035042934, - 0.045391146, - 0.047973733, - 0.10662139, - -0.056172207, - 0.031413678, - 0.0125645455, - -0.003751948, - 0.07743928, - 0.084872924, - 0.047170028, - 0.046972826, - -0.00976389, - -0.032883007, - -0.054116864, - -0.027746534, - -0.08914457, - 0.007070583, - 0.04398771, - -0.0475649, - -0.06489332, - -0.060108416, - 0.0143431965, - 0.05955711, - -0.0774654, - -0.030995058, - 0.05263145, - 0.029864812, - 0.01608842, - 0.09080374, - 0.05185686, - -0.032855753, - 0.0063909087, - 0.0853062, - -0.10142854, - -0.07251046, - -0.005085327, - -0.066178784, - 0.046009053, - -0.09079122, - 0.08566233, - 0.06576406, - 0.017733688, - 0.06487284, - -0.039741356, - 0.04176326, - -0.010695733, - -0.050619148, - 0.01245912, - 0.03467508, - -0.06871932, - 0.030141022, - 0.026552299, - -0.0035028423, - 0.030276356, - 0.05361378, - 0.054491397, - 0.06513585, - -0.08491482, - -0.051875558, - 0.086067244, - 0.0396853, - -0.054731067, - 0.016796874, - -0.036002953, - -0.0658579, - -0.016491668, - -0.0758324, - -0.039184928, - -0.068875834, - 0.031522863, - 0.009083638, - -0.024529556, - -0.059996516, - 0.06894157, - -0.033383097, - -0.002836109, - -0.044933245, - -0.09211297, - 0.075231805, - -0.09029687, - -0.0025871666, - -0.02342682, - 0.06138579, - -0.052864984, - -0.078638926, - 0.020620788, - 0.011810836, - -0.014471281, - 0.01986825, - -0.040482074, - -0.06512211, - 0.062289387, - -0.03012425, - -4.7029057E-4, - 0.035347983, - 0.11894842, - -0.050478995, - -0.014397127, - 0.049630538, - -0.01540003, - 0.052197892, - -0.048483927, - -0.0076621673, - 0.04089758, - 0.015284395, - -0.023267174, - -0.0582655, - 0.035793785, - 0.06800681, - 0.11031594, - 0.10364201, - -0.042768136, - 0.03487297, - 0.03780645, - -0.040866226, - 0.046048936, - 0.029865082, - -0.04171421, - 0.03842289, - -0.0154759055, - -0.020621978, - -0.05873017, - 0.05175785, - -0.03108134, - -0.08132814, - 0.04200817, - 0.05092204, - 0.02828486, - 0.06530922 - ] - }, - { - "start_offset": 9554, - "end_offset": 10460, - "embeddings": [ - 0.06795254, - -0.027788855, - -0.06532636, - -0.05325019, - 0.05093753, - -0.055382267, - 0.051724233, - 0.03824768, - 0.07362302, - 0.04002248, - 0.0550001, - -4.6239374E-4, - 0.09122236, - 0.026436811, - -0.06672792, - 0.011183016, - 0.097761884, - -0.082106106, - -0.028012855, - -0.062181316, - -0.008753627, - -0.019600896, - -0.036626942, - 0.03447432, - 0.06013969, - 0.05223775, - -0.016101984, - -0.010203473, - 0.025855985, - -0.060056984, - -0.03937554, - -0.043756496, - 0.030997807, - -0.10395428, - 0.027634699, - 0.005324417, - -0.024001809, - -0.019012816, - 0.057175636, - -0.04684799, - -0.053544424, - 0.010870207, - 0.029463693, - 0.012833155, - 0.09024689, - 0.07413883, - -0.032765836, - 0.015112767, - -0.026345447, - -0.061428167, - -0.03721613, - 0.049783345, - -0.0010639617, - 0.09159631, - 0.02264281, - -0.055135295, - -0.05914746, - -0.052138176, - -0.07942767, - 0.029073795, - 0.02702419, - -0.0532197, - -0.014727404, - 6.2745955E-4, - 0.029936861, - 0.07468935, - 0.017335532, - 0.059831787, - -0.049344696, - 0.01880937, - -0.04200233, - 0.067229606, - -0.0012889965, - -0.0632363, - 0.043949638, - 0.049309365, - 0.036185548, - -0.058062393, - 0.04830483, - -0.02813847, - -0.06845039, - -0.046040177, - 0.0015298559, - 0.0377331, - -0.028661495, - 0.077781945, - 0.050341487, - -0.060820885, - 0.07588156, - -0.034051448, - 0.0768756, - 0.048993148, - -0.066278465, - -0.09077045, - -0.06511732, - -0.04687162, - -0.06367129, - 0.04149166, - 0.07077744, - -0.041742414, - 0.054795545, - -0.029036827, - 0.06274923, - 0.0061874306, - -0.0317641, - 0.0038963803, - 0.056757834, - -0.043710202, - 0.06571763, - -0.039196618, - 0.011638405, - -0.014480316, - 0.0782259, - 0.041573986, - -0.056704924, - -0.044138405, - -0.032456245, - 0.025932135, - 0.044077054, - -0.007750241, - 0.054932345, - -0.03292227, - -0.028155934, - -0.08390399, - -0.044745676, - -0.039294515, - 0.020936523, - 0.059180506, - 0.09919356, - 0.04366707, - 0.02902992, - 0.041400306, - 0.009594294, - -0.0058838082, - 0.042111326, - 0.12996204, - -0.038631447, - -0.019210441, - 0.056625124, - -0.057970613, - -0.08764153, - 0.08361837, - -0.016109295, - 0.030824538, - 0.061104048, - 0.05500983, - 0.045061268, - -0.055872414, - 0.08214088, - -0.046806127, - 0.057676565, - -0.055537637, - 0.072990045, - 0.045658644, - 0.06032115, - 0.0016026857, - -0.08040042, - -0.082738034, - 0.021192942, - 0.06619772, - -0.060728885, - -0.012204158, - -0.05736885, - 0.011759795, - -0.09559732, - -0.03487954, - -0.004853385, - 0.07568596, - 0.0170426, - -0.032266848, - -0.034448244, - 0.0015031097, - -0.051096436, - 0.067675546, - -0.008337999, - 0.02016469, - -0.034166988, - 1.3699784E-4, - 0.036702186, - 0.03628234, - -0.034941807, - 0.00841879, - -0.034299497, - -0.045383744, - -0.021920165, - -0.037155207, - -0.012305447, - -0.018064288, - 0.041540947, - -0.013256499, - -0.01824263, - 0.027535202, - -0.10648625, - -0.10162097, - -0.08293666, - 0.048940066, - -0.008739751, - 0.03177586, - 0.06314134, - 0.08493229, - -0.03178613, - -0.04156539, - 0.021325408, - 0.015963139, - 0.030367697, - 0.0012957318, - -0.054147527, - 0.049582303, - -0.058355026, - 0.059193954, - 0.080090895, - -0.0643068, - -0.078586616, - 0.061390623, - -0.035683163, - -0.05697204, - 0.016509915, - 0.0456678, - -0.06869852, - -0.021377739, - 0.0459535, - -0.034752127, - 0.044991855, - -0.07434412, - -0.061407465, - 0.0066419234, - 0.04971079, - -0.020545505, - -0.03348485, - 0.072175615, - -0.08419868, - -0.06409017, - -0.028774735, - -0.08295683, - -0.067340076, - -0.052633435, - -0.02234827, - -0.0048523103, - 0.036146127, - -0.041977044, - -0.04563754, - -0.056249525, - 0.040496923, - -0.07444201, - 0.042330384, - -0.034291748, - -0.037762616, - 0.035350475, - -0.023694497, - 0.044436026, - 0.034658603, - -0.071404554, - -0.052793555, - -0.030472925, - -0.023625389, - 0.043577187, - 0.05148583, - 0.09133436, - -0.054247066, - 0.04203479, - 0.030377146, - 0.0089587355, - 0.08783934, - 0.102596834, - 0.004045215, - 0.041863658, - -0.049759816, - -0.041472945, - -0.06560168, - -0.049456153, - -0.06286144, - -0.0021516692, - 0.06415723, - -0.057984285, - -0.052246314, - -0.0468379, - 0.005024449, - 0.063394494, - -0.049811874, - -0.007827677, - 0.043182477, - 0.03432028, - 0.059190553, - 0.051201522, - 0.06459717, - -0.0028205558, - 0.011427307, - 0.07478203, - -0.09011506, - -0.06896538, - 0.015105613, - -0.06902061, - 0.048208747, - -0.076154344, - 0.05893959, - 0.026351677, - -0.013113587, - 0.038620975, - -0.020734645, - 0.042907227, - -0.02616936, - -0.012401203, - 0.036476728, - 0.031379998, - -0.07657323, - 0.07557042, - 0.017815659, - 0.057302337, - 0.031211596, - 0.041240353, - 0.06864739, - 0.056433342, - -0.05830147, - -0.027380649, - 0.054324336, - 0.07243749, - -0.044019613, - 0.0029616277, - -0.061004672, - -0.06978305, - -0.055067733, - -0.06398177, - -0.025761655, - -0.031062664, - 0.038432557, - 0.01983404, - -0.022323918, - -0.08653916, - 0.036503706, - 0.027113369, - 0.051526625, - 0.003591905, - -0.043091606, - 0.048455648, - -0.06892166, - -0.007492171, - -0.018578587, - 0.05494636, - -0.05301073, - -0.094928786, - 0.003945227, - 0.033395912, - -0.034273494, - 0.06995625, - -0.024217183, - -0.06057119, - 0.022178173, - -0.048596364, - -0.03847148, - 0.01584574, - 0.08880866, - -0.09683496, - 0.040496774, - 0.0554991, - -0.0325551, - 0.066031836, - -0.07693793, - -0.014788223, - 0.013764252, - 0.04855808, - -0.037729017, - -0.037790805, - 0.033332434, - 0.09727558, - 0.09606235, - 0.07886385, - -0.017046498, - -0.0047775926, - 0.049902774, - -0.06325739, - 0.032437313, - 0.054471422, - -0.06110438, - 0.020669593, - 0.0070950463, - -0.026809083, - -0.05658399, - 0.048453655, - -0.048016146, - -0.047978207, - 0.046292298, - 0.046507128, - 0.022924135, - 0.07091171 - ] - }, - { - "start_offset": 10459, - "end_offset": 11696, - "embeddings": [ - 0.053117193, - -0.015585508, - -0.05423901, - -0.05138859, - 0.06471939, - -0.07901192, - 0.01693148, - 0.050192464, - 0.09575295, - 0.043414578, - 0.011923588, - -0.009796319, - 0.05132375, - -0.014788656, - -0.025382983, - 0.028342921, - 0.06872216, - -0.055240728, - -0.018316492, - -0.053359665, - 0.013118909, - -0.01603142, - -0.05637189, - 0.060144503, - 0.078957014, - 0.052481424, - -0.063893974, - 0.012951693, - 0.037284218, - -0.0989329, - -0.031654015, - -0.018283853, - 0.048968345, - -0.04817539, - 0.026837517, - 6.222096E-5, - -0.024189027, - 0.0112748975, - 0.0207289, - -0.012949756, - -0.03303762, - 0.04864192, - 0.028754367, - 0.025379542, - 0.047412705, - 0.051565237, - -0.057438288, - 0.032263443, - -0.06824788, - -0.019407846, - -0.059993997, - 0.06091319, - -0.0069746193, - 0.099794194, - -6.160557E-4, - -0.09514187, - -0.09679237, - -0.048262954, - -0.047733244, - 0.09921752, - 0.0332093, - -0.04550775, - -0.05143887, - 0.014637188, - 0.07129097, - 0.09510039, - 0.032291826, - 0.034658313, - -0.017751144, - -0.044415683, - -0.05508973, - 0.067787856, - -0.031251505, - -0.044856634, - -0.0033598887, - 0.04760263, - 0.054377872, - -0.040341455, - 0.018044798, - -0.023554679, - -0.049902994, - -0.031270802, - -0.007537713, - 0.03402409, - -0.027234826, - 0.09944215, - 0.045163024, - -0.05408758, - 0.04893289, - -0.048714437, - 0.061802126, - 0.06499505, - -0.07351746, - -0.08406793, - -0.08679661, - -0.051905084, - -0.034713045, - 0.040123433, - 0.07967649, - -0.06041734, - 0.061612148, - -0.020921662, - 0.05665623, - 0.041377034, - -0.007348656, - 0.015952924, - 0.05296665, - -0.052709162, - 0.071244985, - -0.038275376, - -0.01164368, - 0.014391718, - 0.06113161, - 0.034303535, - -0.050069015, - -0.070354894, - -0.011464902, - -0.028307518, - 0.04133397, - -0.049779277, - 0.08302818, - -0.048584647, - -0.06805662, - -0.04735593, - -0.04913521, - -0.005428242, - 0.03233016, - 0.044904802, - 0.06872594, - -0.01780296, - 0.06279163, - 0.039817583, - 0.007986946, - 0.0121078305, - 0.074653216, - 0.12367899, - -0.037977446, - -0.02724532, - 0.021000944, - -0.07356985, - -0.06435206, - 0.013165806, - -0.004956233, - 0.038697783, - 0.0691664, - 0.041731402, - 0.06331449, - -0.0046027564, - 0.078827925, - -0.028814215, - 0.02115456, - -0.030129815, - 0.071896814, - 0.005554397, - 0.060193166, - -0.016858125, - -0.067393966, - -0.06267468, - 0.019814175, - 0.045483287, - -0.093514144, - -0.026769608, - -0.091787525, - 0.019846648, - -0.092779495, - -0.04158812, - 0.01915316, - 0.03254872, - -9.048901E-4, - 0.014318523, - -0.013507805, - 0.040734824, - -0.047447592, - 0.052798737, - -0.04318385, - 0.020014195, - -0.05593343, - 0.045916248, - 0.059790693, - 0.032657895, - -0.038109995, - -0.006277211, - -0.042986337, - -0.090472385, - -0.003131573, - -0.0031761206, - -0.047034763, - 0.023323804, - 0.06837014, - -0.0039844033, - 0.002091333, - 0.021284567, - -0.052262813, - -0.082303464, - -0.052592263, - 0.032273255, - 0.009803746, - 0.0013473934, - 0.05583177, - 0.06429418, - 0.001517824, - -0.0072332155, - 0.020265736, - 0.0033951101, - 0.058746524, - 0.022550192, - -0.07356808, - 0.045694035, - -0.018434413, - 0.025329743, - 0.061233632, - -0.07960079, - -0.08406201, - 0.06330057, - -0.09599598, - -0.025174508, - 0.028187213, - 0.067423336, - -0.06526651, - 0.006772753, - 0.07572871, - -0.031995185, - 0.05916029, - -0.047238894, - -0.06869731, - 0.032157637, - 0.07589699, - -0.043760736, - -0.013728161, - 0.053143077, - -0.021147272, - -0.049572222, - -0.04899867, - -0.08653491, - -0.07939669, - -0.10523086, - -0.001059735, - 0.0063834176, - 0.015314108, - -0.05562784, - -0.056119584, - -0.08270628, - -3.2258706E-4, - -0.083571695, - 0.052503087, - -0.03977744, - -0.047228433, - 0.07893777, - -0.017052159, - 0.035709318, - 0.04999642, - -0.100927435, - -0.025071207, - -0.046351615, - -0.026675632, - 0.025651569, - 0.068944395, - 0.031405594, - -0.07291537, - 0.018124148, - 0.018039903, - -0.034970153, - 0.10088425, - 0.09433116, - 0.0689122, - 0.049935102, - -0.02560692, - -0.06034739, - -0.060196366, - -0.02504903, - -0.058731087, - 0.04172741, - 0.0038506123, - -0.035828065, - -0.018227967, - -0.07467086, - 0.037910078, - 0.054497574, - -0.07775332, - -0.017336372, - 0.046693277, - 0.022060173, - 0.036212, - 0.046006728, - 0.027395774, - -0.020391421, - -0.029180788, - 0.05312558, - -0.072161354, - 0.016918298, - -0.025024151, - -0.031315047, - 0.08490075, - -0.03845013, - 0.047562983, - 0.021411635, - -0.023305604, - 0.039255943, - -0.026875794, - 0.08610026, - -0.029386222, - -0.016845187, - 0.054429937, - 0.027040144, - -0.06772479, - 0.095606916, - -0.036488708, - 7.7485084E-4, - 0.037060957, - 0.07791183, - 0.07910346, - 0.013702623, - -0.03475561, - -0.040810455, - 0.0774657, - 0.043717105, - -0.05542658, - 0.0030442073, - -0.1050271, - -0.07705069, - -0.029897174, - -0.03622423, - -0.044971265, - -0.06206865, - 0.019566234, - 0.023725986, - 0.010738356, - -0.07149888, - 0.002263669, - 0.023846326, - 0.037898906, - 0.008864181, - -0.03436943, - 0.03523395, - -0.061920922, - -0.022051072, - -0.07035821, - 0.09721548, - -0.047868855, - -0.09084715, - 0.01050229, - 0.06422868, - 0.02094103, - 0.038811173, - -0.023608131, - -0.04335279, - 0.053133078, - -0.021861738, - -0.039793, - 0.048549335, - 0.07316228, - -0.08636803, - 0.017843066, - 0.06287863, - -0.034799643, - 0.06658666, - -0.042144388, - -0.025062915, - -0.005463377, - 0.024971562, - -0.05923357, - -0.041639276, - 0.039569613, - 0.06571587, - 0.096652776, - 0.061983064, - -0.036815662, - 0.0028833281, - 0.05262061, - -0.05568962, - 0.05730981, - 0.08141181, - -0.030994864, - 0.020174727, - -0.06336232, - -0.012292672, - -0.02354779, - 0.03636813, - -0.062137593, - -0.06593778, - 0.008968277, - 0.08741745, - -0.0025689485, - 0.043467455 - ] - }, - { - "start_offset": 11635, - "end_offset": 12404, - "embeddings": [ - 0.060443457, - -0.018814357, - -0.073390484, - -0.072757736, - 0.07602336, - -0.08031318, - 0.049980927, - 0.09587944, - 0.051309362, - 0.06949769, - -0.0072211474, - -0.015413545, - 0.07187972, - -0.027608033, - -0.050633453, - 0.0033393581, - 0.08592932, - -0.05384897, - 0.0090165865, - -0.067287035, - 0.047661647, - -0.034592163, - -0.039346103, - 0.0146116605, - 0.071183585, - 0.08697948, - -0.024614712, - 0.028339177, - 0.022019284, - -0.09268575, - -0.019253781, - -0.041030932, - 0.007305104, - -0.07492374, - 0.053613797, - 0.01886426, - -0.04425684, - -0.024854647, - 0.031085161, - -0.017220812, - -0.020209908, - 0.05369729, - 0.03976705, - 0.029386787, - 0.050238505, - 0.054753933, - -0.06296793, - -0.0058290027, - -0.03582435, - -0.017357286, - -0.02952249, - 0.08404001, - 0.005996583, - 0.07228626, - 0.0453729, - -0.100141585, - -0.092281535, - -0.04168767, - -0.10046059, - 0.1075754, - 0.024743102, - -0.056973584, - -0.035330076, - 0.011421968, - 0.030275127, - 0.09172467, - 0.017434414, - 0.015847225, - -0.0726862, - -0.06845117, - -0.055281464, - 0.067980886, - -3.857295E-4, - -0.054992713, - 0.0075252233, - 0.047023434, - 0.035014, - -0.012921049, - 0.02088017, - -0.058113724, - -0.025709266, - -0.054443315, - -0.019515503, - 0.064740464, - -0.05698313, - 0.09463141, - 0.04497404, - -0.049769837, - 0.0833754, - -0.029804397, - 0.048232727, - 0.06960264, - -0.0549942, - -0.052367542, - -0.053988345, - -0.043551333, - -0.0440573, - 0.0228508, - 0.06116432, - -0.034144748, - 0.07046748, - 0.030397533, - 0.08092524, - 0.01595668, - -0.040372074, - 0.005287498, - 0.08518292, - -0.085493654, - 0.07491553, - -0.057637572, - -0.052299142, - 0.025728408, - 0.061017167, - 0.062338777, - -0.025426613, - -0.046602402, - -0.00770177, - -0.022468466, - 0.04037256, - -0.027729545, - 0.049696933, - -0.04159955, - -0.03250282, - -0.05583671, - -0.057482447, - -0.013210838, - 0.010765793, - -6.568303E-4, - 0.01951912, - 0.0042298064, - 0.06481922, - 0.051263362, - 0.03024384, - 0.0143968, - 0.057499222, - 0.09256815, - -0.057120778, - 0.0056950618, - 0.009500937, - -0.085512474, - -0.062135834, - 0.03066087, - -0.022205362, - 0.04599781, - 0.03531616, - 0.033788696, - 0.092292726, - -0.010158623, - 0.080052234, - -0.0060746367, - 0.05273896, - -0.017564675, - 0.057575084, - -0.005175612, - 0.030423889, - -0.04613064, - -0.067384765, - -0.0777474, - 0.050731033, - 0.06055307, - -0.057881925, - -0.05091726, - -0.09201947, - 0.004303206, - -0.055900693, - -0.0481762, - 0.016243042, - 0.027040469, - -0.0034547276, - -0.049395755, - -0.011644979, - 0.080957845, - -0.058048993, - 0.0492391, - -0.0063328324, - -2.4730185E-4, - -0.07200027, - 0.09804746, - 0.048087306, - 0.048413623, - -0.043330252, - -0.008462916, - -0.046491988, - -0.070683904, - -0.04057368, - 0.0125348065, - -0.059007447, - 0.038095772, - 0.041568384, - -0.024388209, - -0.013926745, - -0.009171631, - -0.05361981, - -0.04222372, - -0.02127755, - 0.051767804, - 0.0061274986, - 0.053409755, - 0.035079833, - 0.027605304, - -0.013933335, - -0.031408813, - 0.005120374, - 0.020053213, - 0.039294656, - -0.005544306, - -0.10680002, - 0.034485042, - -0.02083935, - 0.04792578, - 0.068743885, - -0.11507496, - -0.10216752, - 0.056888673, - -0.06800507, - -0.059926618, - 0.008200659, - 0.030621173, - -0.059572708, - 0.020859051, - 0.11224187, - -0.026253646, - 0.05764227, - -0.047062173, - -0.056426648, - -0.018189454, - 0.06514884, - -0.060741644, - -0.039313216, - 0.011433455, - -0.038083345, - -0.05282726, - -0.052797362, - -0.08434047, - -0.06285792, - -0.058861967, - -0.059305865, - 0.004766285, - 0.06191272, - -0.061296433, - -0.05848144, - -0.038482025, - 0.033259515, - -0.11248364, - 0.017305091, - -0.024461089, - -0.03555484, - 0.0663307, - -0.014705792, - 0.014617273, - 0.04280535, - -0.074889824, - -0.052947134, - -0.030037379, - -0.0077148285, - 0.057981927, - 0.047073305, - 0.06093273, - -0.05974137, - 0.064445026, - 0.029471356, - 9.505361E-4, - 0.08606595, - 0.086340785, - 0.046603594, - 0.05858932, - -0.032077473, - -0.019977393, - -0.03431287, - -0.047011334, - -0.056820385, - 0.013462027, - 0.042805202, - -0.0677109, - -0.023264, - -0.05485633, - 0.055690948, - 0.05651245, - -0.071241796, - -0.00915478, - 0.053730387, - 0.056755595, - 0.03846687, - 0.05310068, - 0.0056861867, - 0.005360462, - -0.039538994, - 0.03497575, - -0.06781882, - -0.011839252, - -0.042778153, - -0.041556057, - 0.034042798, - -0.04711682, - 0.034821853, - 0.043264013, - 0.01696988, - 0.048099734, - -0.019220253, - 0.11707801, - -0.054026723, - -0.018543418, - 0.03708661, - 0.032689948, - -0.05068468, - 0.0691382, - -8.202863E-4, - -0.0019999356, - 0.014463376, - 0.07405446, - 0.07647231, - 0.011515665, - -0.034625884, - -0.055066366, - 0.058186855, - 0.024419852, - -0.031584322, - -0.029543389, - -0.07736987, - -0.07829203, - -0.0053546038, - -0.06570245, - -0.010904349, - -0.06490494, - 0.03149016, - 0.009859199, - -0.052703284, - -0.06884288, - 0.02139801, - 0.026021063, - 0.07129519, - -0.022009108, - -0.041722506, - 0.07192836, - -0.01098611, - -0.0027591465, - -0.04109422, - 0.046898007, - -0.036396515, - -0.090931825, - 0.022607114, - 0.030457368, - -0.03448933, - 0.021621844, - -0.048675153, - -0.033822104, - 0.057124067, - -0.048874307, - -0.054428495, - 0.056644462, - 0.08236624, - -0.08848753, - 0.033674665, - 0.053105284, - -0.019323535, - 0.05071626, - -0.025313161, - 0.0112252515, - 0.018250609, - 0.07660853, - -0.054870043, - -0.07221262, - 0.018527014, - 0.073324844, - 0.061312377, - 0.052779585, - -0.017265014, - 0.0044537727, - 0.054682422, - -0.055878393, - 0.064048916, - 0.03346393, - -0.028677322, - 0.050278228, - -0.02905618, - -0.038899165, - -0.03864768, - 0.042319845, - -0.06788429, - -0.06178736, - 0.06868577, - 0.05830974, - 0.02179902, - 0.052530885 - ] - } - ] - } - } - } - }, - "field": "CHAPTER 1\n\n\n\nLOOMINGS\n\n\n\nCall me Ishmael. Some years ago--never mind how long precisely--having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen, and regulating the circulation. Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul; whenever I find myself involuntarily pausing before coffin warehouses, and bringing up the rear of every funeral I meet; and especially whenever my hypos get such an upper hand of me, that it requires a strong moral principle to prevent me from deliberately stepping into the street, and methodically knocking people's hats off--then, I account it high time to get to sea as soon as I can. This is my substitute for pistol and ball. With a philosophical flourish Cato throws himself upon his sword; I quietly take to the ship. There is nothing surprising in this. If they but knew it, almost all men in their degree, some time or other, cherish very nearly the same feelings towards the ocean with me.\n\nThere now is your insular city of the Manhattoes, belted round by wharves as Indian isles by coral reefs--commerce surrounds it with her surf. Right and left, the streets take you waterward. Its extreme down-town is the battery, where that noble mole is washed by waves, and cooled by breezes, which a few hours previous were out of sight of land. Look at the crowds of water-gazers there.\n\nCircumambulate the city of a dreamy Sabbath afternoon. Go from Corlears Hook to Coenties Slip, and from thence, by Whitehall, northward. What do you see?--Posted like silent sentinels all around the town, stand thousands upon thousands of mortal men fixed in ocean reveries. Some leaning against the spiles; some seated upon the pier-heads; some looking over the bulwarks of ships from China; some high aloft in the rigging, as if striving to get a still better seaward peep. But these are all landsmen; of week days pent up in lath and plaster--tied to counters, nailed to benches, clinched to desks. How then is this? Are the green fields gone? What do they here?\n\nBut look! here come more crowds, pacing straight for the water, and seemingly bound for a dive. Strange! Nothing will content them but the extremest limit of the land; loitering under the shady lee of yonder warehouses will not suffice. No. They must get just as nigh the water as they possibly can without falling in. And there they stand--miles of them--leagues. Inlanders all, they come from lanes and alleys, streets and avenues--north, east, south, and west. Yet here they all unite. Tell me, does the magnetic virtue of the needles of the compasses of all those ships attract them thither?\n\nOnce more. Say, you are in the country; in some high land of lakes. Take almost any path you please, and ten to one it carries you down in a dale, and leaves you there by a pool in the stream. There is magic in it. Let the most absentminded of men be plunged in his deepest reveries--stand that man on his legs, set his feet a-going, and he will infallibly lead you to water, if water there be in all that region. Should you ever be athirst in the great American desert, try this experiment, if your caravan happen to be supplied with a metaphysical professor. Yes, as every one knows, meditation and water are wedded for ever.\n\nBut here is an artist. He desires to paint you the dreamiest, shadiest, quietest, most enchanting bit of romantic landscape in all the valley of the Saco. What is the chief element he employs? There stand his trees, each with a hollow trunk, as if a hermit and a crucifix were within; and here sleeps his meadow, and there sleep his cattle; and up from yonder cottage goes a sleepy smoke. Deep into distant woodlands winds a mazy way, reaching to overlapping spurs of mountains bathed in their hill-side blue. But though the picture lies thus tranced, and though this pine-tree shakes down its sighs like leaves upon this shepherd's head, yet all were vain, unless the shepherd's eye were fixed upon the magic stream before him. Go visit the Prairies in June, when for scores on scores of miles you wade knee-deep among Tiger-lilies--what is the one charm wanting?--Water--there is not a drop of water there! Were Niagara but a cataract of sand, would you travel your thousand miles to see it? Why did the poor poet of Tennessee, upon suddenly receiving two handfuls of silver, deliberate whether to buy him a coat, which he sadly needed, or invest his money in a pedestrian trip to Rockaway Beach? Why is almost every robust healthy boy with a robust healthy soul in him, at some time or other crazy to go to sea? Why upon your first voyage as a passenger, did you yourself feel such a mystical vibration, when first told that you and your ship were now out of sight of land? Why did the old Persians hold the sea holy? Why did the Greeks give it a separate deity, and own brother of Jove? Surely all this is not without meaning. And still deeper the meaning of that story of Narcissus, who because he could not grasp the tormenting, mild image he saw in the fountain, plunged into it and was drowned. But that same image, we ourselves see in all rivers and oceans. It is the image of the ungraspable phantom of life; and this is the key to it all.\n\nNow, when I say that I am in the habit of going to sea whenever I begin to grow hazy about the eyes, and begin to be over conscious of my lungs, I do not mean to have it inferred that I ever go to sea as a passenger. For to go as a passenger you must needs have a purse, and a purse is but a rag unless you have something in it. Besides, passengers get sea-sick--grow quarrelsome--don't sleep of nights--do not enjoy themselves much, as a general thing;--no, I never go as a passenger; nor, though I am something of a salt, do I ever go to sea as a Commodore, or a Captain, or a Cook. I abandon the glory and distinction of such offices to those who like them. For my part, I abominate all honorable respectable toils, trials, and tribulations of every kind whatsoever. It is quite as much as I can do to take care of myself, without taking care of ships, barques, brigs, schooners, and what not. And as for going as cook,--though I confess there is considerable glory in that, a cook being a sort of officer on ship-board--yet, somehow, I never fancied broiling fowls;--though once broiled, judiciously buttered, and judgmatically salted and peppered, there is no one who will speak more respectfully, not to say reverentially, of a broiled fowl than I will. It is out of the idolatrous dotings of the old Egyptians upon broiled ibis and roasted river horse, that you see the mummies of those creatures in their huge bake-houses the pyramids.\n\nNo, when I go to sea, I go as a simple sailor, right before the mast, plumb down into the forecastle, aloft there to the royal mast-head. True, they rather order me about some, and make me jump from spar to spar, like a grasshopper in a May meadow. And at first, this sort of thing is unpleasant enough. It touches one's sense of honor, particularly if you come of an old established family in the land, the Van Rensselaers, or Randolphs, or Hardicanutes. And more than all, if just previous to putting your hand into the tar-pot, you have been lording it as a country schoolmaster, making the tallest boys stand in awe of you. The transition is a keen one, I assure you, from a schoolmaster to a sailor, and requires a strong decoction of Seneca and the Stoics to enable you to grin and bear it. But even this wears off in time.\n\nWhat of it, if some old hunks of a sea-captain orders me to get a broom and sweep down the decks? What does that indignity amount to, weighed, I mean, in the scales of the New Testament? Do you think the archangel Gabriel thinks anything the less of me, because I promptly and respectfully obey that old hunks in that particular instance? Who ain't a slave? Tell me that. Well, then, however the old sea-captains may order me about--however they may thump and punch me about, I have the satisfaction of knowing that it is all right; that everybody else is one way or other served in much the same way--either in a physical or metaphysical point of view, that is; and so the universal thump is passed round, and all hands should rub each other's shoulder-blades, and be content.\n\nAgain, I always go to sea as a sailor, because they make a point of paying me for my trouble, whereas they never pay passengers a single penny that I ever heard of. On the contrary, passengers themselves must pay. And there is all the difference in the world between paying and being paid. The act of paying is perhaps the most uncomfortable infliction that the two orchard thieves entailed upon us. But being paid,--what will compare with it? The urbane activity with which a man receives money is really marvellous, considering that we so earnestly believe money to be the root of all earthly ills, and that on no account can a monied man enter heaven. Ah! how cheerfully we consign ourselves to perdition!\n\nFinally, I always go to sea as a sailor, because of the wholesome exercise and pure air of the forecastle deck. For as in this world, head winds are far more prevalent than winds from astern (that is, if you never violate the Pythagorean maxim), so for the most part the Commodore on the quarter-deck gets his atmosphere at second hand from the sailors on the forecastle. He thinks he breathes it first; but not so. In much the same way do the commonalty lead their leaders in many other things, at the same time that the leaders little suspect it. But wherefore it was that after having repeatedly smelt the sea as a merchant sailor, I should now take it into my head to go on a whaling voyage; this the invisible police officer of the Fates, who has the constant surveillance of me, and secretly dogs me, and influences me in some unaccountable way--he can better answer than any one else. And, doubtless, my going on this whaling voyage, formed part of the grand programme of Providence that was drawn up a long time ago. It came in as a sort of brief interlude and solo between more extensive performances. I take it that this part of the bill must have run something like this:\n\n\n\n\"grand contested election for the presidency of the united states.\n\n\"whaling voyage by one ishmael.\n\n\"bloody battle in afghanistan.\"\n\n\n\nThough I cannot tell why it was exactly that those stage managers, the Fates, put me down for this shabby part of a whaling voyage, when others were set down for magnificent parts in high tragedies, and short and easy parts in genteel comedies, and jolly parts in farces--though I cannot tell why this was exactly; yet, now that I recall all the circumstances, I think I can see a little into the springs and motives which being cunningly presented to me under various disguises, induced me to set about performing the part I did, besides cajoling me into the delusion that it was a choice resulting from my own unbiased freewill and discriminating judgment.\n\nChief among these motives was the overwhelming idea of the great whale himself. Such a portentous and mysterious monster roused all my curiosity. Then the wild and distant seas where he rolled his island bulk; the undeliverable, nameless perils of the whale; these, with all the attending marvels of a thousand Patagonian sights and sounds, helped to sway me to my wish. With other men, perhaps, such things would not have been inducements; but as for me, I am tormented with an everlasting itch for things remote. I love to sail forbidden seas, and land on barbarous coasts. Not ignoring what is good, I am quick to perceive a horror, and could still be social with it--would they let me--since it is but well to be on friendly terms with all the inmates of the place one lodges in.\n\nBy reason of these things, then, the whaling voyage was welcome; the great flood-gates of the wonder-world swung open, and in the wild conceits that swayed me to my purpose, two and two there floated into my inmost soul, endless processions of the whale, and, mid most of them all, one grand hooded phantom, like a snow hill in the air.\n\nCopyright © 1967 by Bantam Books\n\nPublisher\nModern Library\n\nCategories\nClassic Fiction\nLiterary Fiction\nFiction\nClassics\n\n\nAbout Moby-Dick\n\nAbout Herman Melville" -} \ No newline at end of file diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml index f4464f64b141c..882f1df03e926 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml @@ -18,15 +18,6 @@ setup: --- "Indexes sparse vector document": - - requires: - test_runner_features: [capabilities] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [inference_metadata_fields] # Checks mapping is not updated until first doc arrives - do: @@ -74,17 +65,10 @@ setup: --- "Field caps with sparse embedding": + - requires: cluster_features: "gte_v8.16.0" reason: field_caps support for semantic_text added in 8.16.0 - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: field_caps: @@ -157,15 +141,6 @@ setup: --- "Indexes dense vector document": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] # Checks mapping is not updated until first doc arrives - do: @@ -208,17 +183,10 @@ setup: --- "Field caps with text embedding": + - requires: cluster_features: "gte_v8.16.0" reason: field_caps support for semantic_text added in 8.16.0 - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: field_caps: diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapping_incompatible_field_mapping.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapping_incompatible_field_mapping.yml index ace30a6c2727c..3d46c3b23d7e3 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapping_incompatible_field_mapping.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapping_incompatible_field_mapping.yml @@ -59,15 +59,6 @@ setup: --- "Fails for non-compatible dimensions": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: catch: /Incompatible model settings for field \[dense_field\].+/ @@ -90,15 +81,6 @@ setup: --- "Fails for non-compatible inference id": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: catch: /The configured inference_id \[a-different-inference-id\] for field \[dense_field\] doesn't match the inference_id \[dense-inference-id\].+/ @@ -121,15 +103,6 @@ setup: --- "Fails for non-compatible similarity": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: catch: /Incompatible model settings for field \[dense_field\].+/ @@ -152,15 +125,6 @@ setup: --- "Fails for non-compatible element type": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: catch: /Incompatible model settings for field \[dense_field\].+/ @@ -183,15 +147,6 @@ setup: --- "Fails for non-compatible task type for dense vectors": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: catch: /Incompatible model settings for field \[dense_field\].+/ @@ -215,15 +170,6 @@ setup: --- "Fails for non-compatible task type for sparse vectors": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: catch: /Incompatible model settings for field \[sparse_field\].+/ @@ -246,15 +192,6 @@ setup: --- "Fails for missing dense vector inference results in chunks": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: catch: /failed to parse field \[dense_field\] of type \[semantic_text\]/ @@ -276,15 +213,6 @@ setup: --- "Fails for missing sparse vector inference results in chunks": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: catch: /failed to parse field \[sparse_field\] of type \[semantic_text\]/ @@ -303,15 +231,6 @@ setup: --- "Fails for missing text in chunks": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: catch: /failed to parse field \[dense_field\] of type \[semantic_text\]/ diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml index d4bc2fb122a06..534e4831c4a0a 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml @@ -52,16 +52,6 @@ setup: --- "Calculates sparse embedding and text embedding results for new documents": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: index: index: test-index @@ -86,16 +76,6 @@ setup: --- "Calculates sparse embedding and text embedding results for new documents with integer value": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: index: index: test-index @@ -118,16 +98,6 @@ setup: --- "Calculates sparse embedding and text embedding results for new documents with boolean value": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: index: index: test-index @@ -150,16 +120,6 @@ setup: --- "Calculates sparse embedding and text embedding results for new documents with a collection of mixed data types": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: index: index: test-index @@ -223,16 +183,6 @@ setup: --- "Sparse vector results are indexed as nested chunks and searchable": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: bulk: index: test-index @@ -292,16 +242,6 @@ setup: --- "Dense vector results are indexed as nested chunks and searchable": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: bulk: index: test-index @@ -364,16 +304,6 @@ setup: --- "Reindex works for semantic_text fields": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: index: index: test-index @@ -465,16 +395,6 @@ setup: --- "semantic_text copy_to calculates embeddings for source fields": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: indices.create: index: test-copy-to-index @@ -516,16 +436,6 @@ setup: --- "Calculates embeddings for bulk operations - index": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: bulk: body: @@ -550,15 +460,6 @@ setup: --- "Update by query picks up new semantic_text fields": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: indices.create: @@ -616,15 +517,6 @@ setup: --- "Update by query works for scripts": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: index: @@ -666,13 +558,6 @@ setup: path: /_inference capabilities: [default_elser_2] - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: indices.create: index: test-elser-2-default-index @@ -703,14 +588,6 @@ setup: - requires: cluster_features: "semantic_text.in_object_field_fix" reason: object field fix added in 8.16.0 & 8.15.4 - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: indices.create: @@ -753,14 +630,6 @@ setup: - requires: cluster_features: semantic_text.delete_fix reason: Delete operations are properly applied when subsequent operations include a semantic text field. - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: bulk: diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml index 0a5a5b37dad55..3d3790d879ef1 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml @@ -93,16 +93,8 @@ setup: --- "Query using a sparse embedding model": - - requires: - test_runner_features: [ capabilities ] - - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: index: @@ -132,16 +124,8 @@ setup: --- "Numeric query using a sparse embedding model": - - requires: - test_runner_features: [ capabilities ] - - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: index: @@ -166,16 +150,8 @@ setup: --- "Boolean query using a sparse embedding model": - - requires: - test_runner_features: [ capabilities ] - - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: index: @@ -203,15 +179,9 @@ setup: - requires: cluster_features: "semantic_text.search_inference_id" reason: search_inference_id introduced in 8.16.0 - test_runner_features: [ capabilities ] - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: indices.put_mapping: @@ -251,16 +221,8 @@ setup: --- "Query using a dense embedding model": - - requires: - test_runner_features: [ capabilities ] - - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: index: @@ -290,16 +252,8 @@ setup: --- "Numeric query using a dense embedding model": - - requires: - test_runner_features: [ capabilities ] - - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: index: @@ -324,16 +278,8 @@ setup: --- "Boolean query using a dense embedding model": - - requires: - test_runner_features: [ capabilities ] - - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: index: @@ -358,16 +304,8 @@ setup: --- "Query using a dense embedding model that uses byte embeddings": - - requires: - test_runner_features: [ capabilities ] - - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: inference.put: @@ -430,15 +368,9 @@ setup: - requires: cluster_features: "semantic_text.search_inference_id" reason: search_inference_id introduced in 8.16.0 - test_runner_features: [ capabilities ] - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: indices.put_mapping: @@ -478,16 +410,8 @@ setup: --- "Apply boost and query name": - - requires: - test_runner_features: [ capabilities ] - - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: index: @@ -520,16 +444,8 @@ setup: --- "Query an index alias": - - requires: - test_runner_features: [ capabilities ] - - skip: features: [ "headers", "close_to" ] - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: index: diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml index fb664dda59d8e..294761608ee81 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml @@ -52,16 +52,6 @@ setup: --- "Updating non semantic_text fields does not recalculate embeddings": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: index: index: test-index @@ -102,16 +92,6 @@ setup: --- "Updating semantic_text fields recalculates embeddings": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: index: index: test-index @@ -190,16 +170,6 @@ setup: --- "Update logic handles source fields in object fields": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: indices.create: index: test-copy-to-index @@ -299,16 +269,6 @@ setup: --- "Updates fail when the updated value is invalid": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: index: index: test-index @@ -353,16 +313,6 @@ setup: --- "Partial updates work when using the update API": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: indices.create: index: test-copy-to-index @@ -427,16 +377,6 @@ setup: --- "Partial updates work when using the update API and the semantic_text field's original value is null": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: indices.create: index: test-copy-to-index @@ -502,16 +442,6 @@ setup: --- "Updates with script are not allowed": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: bulk: index: test-index @@ -545,16 +475,6 @@ setup: --- "semantic_text copy_to needs values for every source field for bulk updates": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: indices.create: index: test-copy-to-index @@ -592,16 +512,6 @@ setup: --- "Calculates embeddings for bulk operations - update": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - - do: bulk: body: @@ -646,16 +556,6 @@ setup: --- "Calculates embeddings for bulk operations - upsert": - - requires: - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - # Initial update fails - do: bulk: @@ -716,14 +616,6 @@ setup: - requires: cluster_features: semantic_text.single_field_update_fix reason: Standalone semantic text fields are now optional in a bulk update operation - test_runner_features: [ capabilities ] - - - skip: - reason: Test targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] # Update as upsert - do: diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml index 9e2bd8fefd15a..25cd1b5aec48a 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml @@ -2,15 +2,6 @@ setup: - requires: cluster_features: "semantic_text.highlighter" reason: a new highlighter for semantic text field - test_runner_features: [ capabilities ] - - # TODO: Remove once highlighter supports inference metadata fields - - skip: - reason: Test suite targets semantic text without inference metadata fields - capabilities: - - method: GET - path: /_inference - capabilities: [ inference_metadata_fields ] - do: inference.put: diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_unsupported_types.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_unsupported_types.yml index e100f30717aef..049895bc9f31a 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_unsupported_types.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_unsupported_types.yml @@ -504,3 +504,52 @@ double nested declared in mapping: # The `nested` field is not visible, nor are any of it's subfields. - match: { columns: [{name: name, type: keyword}] } + +--- +semantic_text declared in mapping: + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: POST + path: /_query + parameters: [ ] + capabilities: [ semantic_text_type ] + reason: "support for semantic_text type" + - do: + indices.create: + index: test_semantic_text + body: + settings: + number_of_shards: 5 + mappings: + properties: + semantic_text_field: + type: semantic_text + inference_id: my_inference_id + - do: + bulk: + index: test_semantic_text + refresh: true + body: + - { "index": { } } + - { + "semantic_text_field": { + "text": "be excellent to each other", + "inference": { + "inference_id": "my_inference_id", + "model_settings": { + "task_type": "sparse_embedding" + }, + "chunks": [{ "text": "be excellent to each other", "embeddings": { "a": 1,"b": 2 } }] + } + } + } + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test_semantic_text' + - match: { columns: [{name: semantic_text_field, type: semantic_text}] } + - length: { values: 1 } + - match: { values.0: ["be excellent to each other"] }