From 8350ff29ba18c7d03d652b107532415705426da9 Mon Sep 17 00:00:00 2001 From: John Verwolf Date: Thu, 28 Nov 2024 13:25:02 -0800 Subject: [PATCH] Extensible Completion Postings Formats (#111494) Allows the Completion Postings Format to be extensible by providing an implementation of the CompletionsPostingsFormatExtension SPIs. --- docs/changelog/111494.yaml | 5 ++++ server/src/main/java/module-info.java | 6 +++- .../index/codec/PerFieldFormatSupplier.java | 24 ++++++++++++++-- .../index/mapper/CompletionFieldMapper.java | 5 ---- .../index/mapper/MappingLookup.java | 17 ----------- .../CompletionsPostingsFormatExtension.java | 28 +++++++++++++++++++ 6 files changed, 59 insertions(+), 26 deletions(-) create mode 100644 docs/changelog/111494.yaml create mode 100644 server/src/main/java/org/elasticsearch/internal/CompletionsPostingsFormatExtension.java diff --git a/docs/changelog/111494.yaml b/docs/changelog/111494.yaml new file mode 100644 index 0000000000000..6c7b84bb04798 --- /dev/null +++ b/docs/changelog/111494.yaml @@ -0,0 +1,5 @@ +pr: 111494 +summary: Extensible Completion Postings Formats +area: "Suggesters" +type: enhancement +issues: [] diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 63dbac3a72487..d572d3b90fec8 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -7,6 +7,7 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ +import org.elasticsearch.internal.CompletionsPostingsFormatExtension; import org.elasticsearch.plugins.internal.RestExtension; /** The Elasticsearch Server Module. */ @@ -288,7 +289,8 @@ to org.elasticsearch.serverless.version, org.elasticsearch.serverless.buildinfo, - org.elasticsearch.serverless.constants; + org.elasticsearch.serverless.constants, + org.elasticsearch.serverless.codec; exports org.elasticsearch.lucene.analysis.miscellaneous; exports org.elasticsearch.lucene.grouping; exports org.elasticsearch.lucene.queries; @@ -395,6 +397,7 @@ org.elasticsearch.stateless, org.elasticsearch.settings.secure, org.elasticsearch.serverless.constants, + org.elasticsearch.serverless.codec, org.elasticsearch.serverless.apifiltering, org.elasticsearch.internal.security; @@ -414,6 +417,7 @@ uses org.elasticsearch.node.internal.TerminationHandlerProvider; uses org.elasticsearch.internal.VersionExtension; uses org.elasticsearch.internal.BuildExtension; + uses CompletionsPostingsFormatExtension; uses org.elasticsearch.features.FeatureSpecification; uses org.elasticsearch.plugins.internal.LoggingDataProvider; diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java index 9c2a08a69002c..4d3d37ab4f3af 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java @@ -20,10 +20,15 @@ import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat; import org.elasticsearch.index.codec.postings.ES812PostingsFormat; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat; +import org.elasticsearch.index.mapper.CompletionFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.internal.CompletionsPostingsFormatExtension; +import org.elasticsearch.plugins.ExtensionLoader; + +import java.util.ServiceLoader; /** * Class that encapsulates the logic of figuring out the most appropriate file format for a given field, across postings, doc values and @@ -53,15 +58,28 @@ public PostingsFormat getPostingsFormatForField(String field) { private PostingsFormat internalGetPostingsFormatForField(String field) { if (mapperService != null) { - final PostingsFormat format = mapperService.mappingLookup().getPostingsFormat(field); - if (format != null) { - return format; + Mapper mapper = mapperService.mappingLookup().getMapper(field); + if (mapper instanceof CompletionFieldMapper) { + return PostingsFormatHolder.POSTINGS_FORMAT; } } // return our own posting format using PFOR return es812PostingsFormat; } + private static class PostingsFormatHolder { + private static final PostingsFormat POSTINGS_FORMAT = getPostingsFormat(); + + private static PostingsFormat getPostingsFormat() { + String defaultName = "Completion912"; // Caution: changing this name will result in exceptions if a field is created during a + // rolling upgrade and the new codec (specified by the name) is not available on all nodes in the cluster. + String codecName = ExtensionLoader.loadSingleton(ServiceLoader.load(CompletionsPostingsFormatExtension.class)) + .map(CompletionsPostingsFormatExtension::getFormatName) + .orElse(defaultName); + return PostingsFormat.forName(codecName); + } + } + boolean useBloomFilter(String field) { if (mapperService == null) { return false; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java index 53ccccdbd4bab..bb229c795a83e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java @@ -8,7 +8,6 @@ */ package org.elasticsearch.index.mapper; -import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; @@ -344,10 +343,6 @@ public CompletionFieldType fieldType() { return (CompletionFieldType) super.fieldType(); } - static PostingsFormat postingsFormat() { - return PostingsFormat.forName("Completion912"); - } - @Override public boolean parsesArrayValue() { return true; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index 2f78e11761448..ce3f8cfb53184 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -9,7 +9,6 @@ package org.elasticsearch.index.mapper; -import org.apache.lucene.codecs.PostingsFormat; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.index.IndexSettings; @@ -21,7 +20,6 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -58,7 +56,6 @@ private CacheKey() {} private final Map indexAnalyzersMap; private final List indexTimeScriptMappers; private final Mapping mapping; - private final Set completionFields; private final int totalFieldsCount; /** @@ -161,7 +158,6 @@ private MappingLookup( this.nestedLookup = NestedLookup.build(nestedMappers); final Map indexAnalyzersMap = new HashMap<>(); - final Set completionFields = new HashSet<>(); final List indexTimeScriptMappers = new ArrayList<>(); for (FieldMapper mapper : mappers) { if (objects.containsKey(mapper.fullPath())) { @@ -174,9 +170,6 @@ private MappingLookup( if (mapper.hasScript()) { indexTimeScriptMappers.add(mapper); } - if (mapper instanceof CompletionFieldMapper) { - completionFields.add(mapper.fullPath()); - } } for (FieldAliasMapper aliasMapper : aliasMappers) { @@ -211,7 +204,6 @@ private MappingLookup( this.objectMappers = Map.copyOf(objects); this.runtimeFieldMappersCount = runtimeFields.size(); this.indexAnalyzersMap = Map.copyOf(indexAnalyzersMap); - this.completionFields = Set.copyOf(completionFields); this.indexTimeScriptMappers = List.copyOf(indexTimeScriptMappers); runtimeFields.stream().flatMap(RuntimeField::asMappedFieldTypes).map(MappedFieldType::name).forEach(this::validateDoesNotShadow); @@ -285,15 +277,6 @@ public Iterable fieldMappers() { return fieldMappers.values(); } - /** - * Gets the postings format for a particular field - * @param field the field to retrieve a postings format for - * @return the postings format for the field, or {@code null} if the default format should be used - */ - public PostingsFormat getPostingsFormat(String field) { - return completionFields.contains(field) ? CompletionFieldMapper.postingsFormat() : null; - } - void checkLimits(IndexSettings settings) { checkFieldLimit(settings.getMappingTotalFieldsLimit()); checkObjectDepthLimit(settings.getMappingDepthLimit()); diff --git a/server/src/main/java/org/elasticsearch/internal/CompletionsPostingsFormatExtension.java b/server/src/main/java/org/elasticsearch/internal/CompletionsPostingsFormatExtension.java new file mode 100644 index 0000000000000..bb28d4dd6c901 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/internal/CompletionsPostingsFormatExtension.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.internal; + +import org.apache.lucene.search.suggest.document.CompletionPostingsFormat; + +/** + * Allows plugging-in the Completions Postings Format. + */ +public interface CompletionsPostingsFormatExtension { + + /** + * Returns the name of the {@link CompletionPostingsFormat} that Elasticsearch should use. Should return null if the extension + * is not enabled. + *

+ * Note that the name must match a codec that is available on all nodes in the cluster, otherwise IndexCorruptionExceptions will occur. + * A feature can be used to protect against this scenario, or alternatively, the codec code can be rolled out prior to its usage by this + * extension. + */ + String getFormatName(); +}