Skip to content

Commit

Permalink
iter
Browse files Browse the repository at this point in the history
  • Loading branch information
jimczi committed Nov 30, 2024
1 parent 1d1e819 commit 4f29bd8
Show file tree
Hide file tree
Showing 20 changed files with 71 additions and 105 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -349,21 +349,6 @@ public final DocumentParserContext addIgnoredFieldFromContext(IgnoredSourceField
return this;
}

/**
* Called by {@link InferenceMetadataFieldsMapper} to indicate whether the metadata field is present
* in _source.
*/
public void markInferenceMetadataField() {
this.hasInferenceMetadata = true;
}

/**
* Returns whether the _source contains an inference metadata field.
*/
public final boolean hasInferenceMetadataField() {
return hasInferenceMetadata;
}

/**
* Wraps {@link XContentDataHelper#encodeToken}, disabling dot expansion from {@link DotExpandingXContentParser}.
* This helps avoid producing duplicate names in the same scope, due to expanding dots to objects.
Expand Down Expand Up @@ -679,6 +664,21 @@ public boolean isWithinInferenceMetadata() {
return false;
}

/**
* Called by {@link InferenceMetadataFieldsMapper} to indicate whether the metadata field is present
* in _source.
*/
public void markInferenceMetadataField() {
this.hasInferenceMetadata = true;
}

/**
* Returns whether the _source contains an inference metadata field.
*/
public final boolean hasInferenceMetadataField() {
return hasInferenceMetadata;
}

boolean inArrayScope() {
return currentScope == Scope.ARRAY;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ protected static String convertFieldValue(MappedFieldType type, Object value) {
}
}

public static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
protected static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
// postings highlighter accepts all values in a single string, as offsets etc. need to match with content
// loaded from stored fields, we merge all values using a proper separator
String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(valuesSeparator));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1166,10 +1166,8 @@ public void testSupportsParsingObject() throws IOException {
Object sampleValueForDocument = getSampleObjectForDocument();
assertThat(sampleValueForDocument, instanceOf(Map.class));
SourceToParse source = source(builder -> {
builder.startObject(InferenceMetadataFieldsMapper.NAME);
builder.field("field");
builder.value(sampleValueForDocument);
builder.endObject();
});
ParsedDocument doc = mapper.parse(source);
assertNotNull(doc);
Expand Down
2 changes: 1 addition & 1 deletion x-pack/plugin/core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ dependencies {
api "commons-codec:commons-codec:${versions.commonscodec}"
testImplementation project(path: ':modules:aggregations')
testImplementation project(path: ':modules:data-streams')
testImplementation project(':modules:mapper-extras')

// security deps
api 'com.unboundid:unboundid-ldapsdk:6.0.3'
Expand All @@ -68,7 +69,6 @@ dependencies {
testImplementation project(path: ':modules:analysis-common')
testImplementation project(path: ':modules:rest-root')
testImplementation project(path: ':modules:health-shards-availability')
testImplementation project(path: ':modules:mapper-extras')
// Needed for Fips140ProviderVerificationTests
testCompileOnly('org.bouncycastle:bc-fips:1.0.2.5')

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.license.LicensesMetadata;
import org.elasticsearch.persistent.PersistentTaskParams;
import org.elasticsearch.persistent.PersistentTaskState;
Expand Down Expand Up @@ -70,6 +71,9 @@
import org.elasticsearch.xpack.core.ml.job.config.JobTaskState;
import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskParams;
import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskState;
import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder;
import org.elasticsearch.xpack.core.ml.search.TextExpansionQueryBuilder;
import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder;
import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage;
import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage;
import org.elasticsearch.xpack.core.rollup.RollupField;
Expand Down Expand Up @@ -390,4 +394,23 @@ public List<NamedXContentRegistry.Entry> getNamedXContent() {
)
);
}

@Override
public List<SearchPlugin.QuerySpec<?>> getQueries() {
return List.of(
new QuerySpec<>(SparseVectorQueryBuilder.NAME, SparseVectorQueryBuilder::new, SparseVectorQueryBuilder::fromXContent),
new QuerySpec<QueryBuilder>(
TextExpansionQueryBuilder.NAME,
TextExpansionQueryBuilder::new,
TextExpansionQueryBuilder::fromXContent
),
// TODO: The WeightedTokensBuilder is slated for removal after the SparseVectorQueryBuilder is available.
// The logic to create a Boolean query based on weighted tokens will remain and/or be moved to server.
new SearchPlugin.QuerySpec<QueryBuilder>(
WeightedTokensQueryBuilder.NAME,
WeightedTokensQueryBuilder::new,
WeightedTokensQueryBuilder::fromXContent
)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
import java.io.IOException;
import java.util.Objects;

public class SparseVectorQuery extends Query {
public class SparseVectorQueryWrapper extends Query {
private final String fieldName;
private final Query termsQuery;

public SparseVectorQuery(String fieldName, Query termsQuery) {
public SparseVectorQueryWrapper(String fieldName, Query termsQuery) {
this.fieldName = fieldName;
this.termsQuery = termsQuery;
}
Expand All @@ -34,7 +34,7 @@ public Query getTermsQuery() {
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
var rewrite = termsQuery.rewrite(indexSearcher);
if (rewrite != termsQuery) {
return new SparseVectorQuery(fieldName, rewrite);
return new SparseVectorQueryWrapper(fieldName, rewrite);
}
return this;
}
Expand All @@ -61,7 +61,7 @@ public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
SparseVectorQuery that = (SparseVectorQuery) obj;
SparseVectorQueryWrapper that = (SparseVectorQueryWrapper) obj;
return fieldName.equals(that.fieldName) && termsQuery.equals(that.termsQuery);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public static Query queryBuilderWithAllTokens(
for (var token : tokens) {
qb.add(new BoostQuery(ft.termQuery(token.token(), context), token.weight()), BooleanClause.Occur.SHOULD);
}
return new SparseVectorQuery(fieldName, qb.setMinimumNumberShouldMatch(1).build());
return new SparseVectorQueryWrapper(fieldName, qb.setMinimumNumberShouldMatch(1).build());
}

public static Query queryBuilderWithPrunedTokens(
Expand Down Expand Up @@ -69,7 +69,7 @@ public static Query queryBuilderWithPrunedTokens(
}
}

return new SparseVectorQuery(fieldName, qb.setMinimumNumberShouldMatch(1).build());
return new SparseVectorQueryWrapper(fieldName, qb.setMinimumNumberShouldMatch(1).build());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ private void testDoToQuery(SparseVectorQueryBuilder queryBuilder, SearchExecutio
// It's possible that all documents were pruned for aggressive pruning configurations
assertTrue(query instanceof BooleanQuery || query instanceof MatchNoDocsQuery);
} else {
assertTrue(query instanceof SparseVectorQuery);
assertTrue(query instanceof SparseVectorQueryWrapper);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,8 @@ public void testPruningIsAppliedCorrectly() throws IOException {
}

private void assertCorrectLuceneQuery(String name, Query query, List<String> expectedFeatureFields) {
assertTrue(query instanceof SparseVectorQuery);
Query termsQuery = ((SparseVectorQuery) query).getTermsQuery();
assertTrue(query instanceof SparseVectorQueryWrapper);
Query termsQuery = ((SparseVectorQueryWrapper) query).getTermsQuery();
assertTrue(termsQuery instanceof BooleanQuery);
List<BooleanClause> booleanClauses = ((BooleanQuery) termsQuery).clauses();
assertEquals(
Expand Down Expand Up @@ -345,8 +345,8 @@ public void testMustRewrite() throws IOException {

@Override
protected void doAssertLuceneQuery(WeightedTokensQueryBuilder queryBuilder, Query query, SearchExecutionContext context) {
assertThat(query, instanceOf(SparseVectorQuery.class));
Query termsQuery = ((SparseVectorQuery) query).getTermsQuery();
assertThat(query, instanceOf(SparseVectorQueryWrapper.class));
Query termsQuery = ((SparseVectorQueryWrapper) query).getTermsQuery();
assertThat(termsQuery, instanceOf(BooleanQuery.class));
BooleanQuery booleanQuery = (BooleanQuery) termsQuery;
assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1);
Expand Down
1 change: 0 additions & 1 deletion x-pack/plugin/inference/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
requires software.amazon.awssdk.retries.api;
requires org.reactivestreams;
requires org.elasticsearch.logging;
requires org.apache.lucene.highlighter;

exports org.elasticsearch.xpack.inference.action;
exports org.elasticsearch.xpack.inference.registry;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,6 @@
import org.elasticsearch.xpack.inference.mapper.OffsetSourceMetaFieldMapper;
import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper;
import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder;
import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder;
import org.elasticsearch.xpack.core.ml.search.TextExpansionQueryBuilder;
import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder;
import org.elasticsearch.xpack.inference.rank.random.RandomRankBuilder;
import org.elasticsearch.xpack.inference.rank.random.RandomRankRetrieverBuilder;
import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankBuilder;
Expand Down Expand Up @@ -426,22 +423,7 @@ public Collection<MappedActionFilter> getMappedActionFilters() {
}

public List<QuerySpec<?>> getQueries() {
return List.of(
new QuerySpec<>(SemanticQueryBuilder.NAME, SemanticQueryBuilder::new, SemanticQueryBuilder::fromXContent),
new QuerySpec<>(SparseVectorQueryBuilder.NAME, SparseVectorQueryBuilder::new, SparseVectorQueryBuilder::fromXContent),
new QuerySpec<QueryBuilder>(
TextExpansionQueryBuilder.NAME,
TextExpansionQueryBuilder::new,
TextExpansionQueryBuilder::fromXContent
),
// TODO: The WeightedTokensBuilder is slated for removal after the SparseVectorQueryBuilder is available.
// The logic to create a Boolean query based on weighted tokens will remain and/or be moved to server.
new QuerySpec<QueryBuilder>(
WeightedTokensQueryBuilder.NAME,
WeightedTokensQueryBuilder::new,
WeightedTokensQueryBuilder::fromXContent
)
);
return List.of(new QuerySpec<>(SemanticQueryBuilder.NAME, SemanticQueryBuilder::new, SemanticQueryBuilder::fromXContent));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,5 @@
import java.util.List;

public interface Chunker {
record Chunk(int startOffset, int endOffset) {}

List<String> chunk(String input, ChunkingSettings chunkingSettings);

List<Chunk> chunkOffset(String input, ChunkingSettings chunkingSettings);
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;

/**
* Split text into chunks aligned on sentence boundaries.
Expand Down Expand Up @@ -59,20 +58,6 @@ public List<String> chunk(String input, ChunkingSettings chunkingSettings) {
}
}

@Override
public List<Chunk> chunkOffset(String input, ChunkingSettings chunkingSettings) {
if (chunkingSettings instanceof SentenceBoundaryChunkingSettings sentenceBoundaryChunkingSettings) {
return chunkOffset(input, sentenceBoundaryChunkingSettings.maxChunkSize, sentenceBoundaryChunkingSettings.sentenceOverlap > 0);
} else {
throw new IllegalArgumentException(
Strings.format(
"SentenceBoundaryChunker can't use ChunkingSettings with strategy [%s]",
chunkingSettings.getChunkingStrategy()
)
);
}
}

/**
* Break the input text into small chunks on sentence boundaries.
*
Expand All @@ -81,19 +66,7 @@ public List<Chunk> chunkOffset(String input, ChunkingSettings chunkingSettings)
* @return The input text chunked
*/
public List<String> chunk(String input, int maxNumberWordsPerChunk, boolean includePrecedingSentence) {
var chunks = chunkOffset(input, maxNumberWordsPerChunk, includePrecedingSentence);
return chunks.stream().map(c -> input.substring(c.startOffset(), c.endOffset())).collect(Collectors.toList());
}

/**
* Break the input text into small chunks on sentence boundaries.
*
* @param input Text to chunk
* @param maxNumberWordsPerChunk Maximum size of the chunk
* @return The input text chunked
*/
public List<Chunk> chunkOffset(String input, int maxNumberWordsPerChunk, boolean includePrecedingSentence) {
var chunks = new ArrayList<Chunk>();
var chunks = new ArrayList<String>();

sentenceIterator.setText(input);
wordIterator.setText(input);
Expand All @@ -118,7 +91,7 @@ public List<Chunk> chunkOffset(String input, int maxNumberWordsPerChunk, boolean
int nextChunkWordCount = wordsInSentenceCount;
if (chunkWordCount > 0) {
// add a new chunk containing all the input up to this sentence
chunks.add(new Chunk(chunkStart, chunkEnd));
chunks.add(input.substring(chunkStart, chunkEnd));

if (includePrecedingSentence) {
if (wordsInPrecedingSentenceCount + wordsInSentenceCount > maxNumberWordsPerChunk) {
Expand Down Expand Up @@ -154,7 +127,7 @@ public List<Chunk> chunkOffset(String input, int maxNumberWordsPerChunk, boolean
for (; i < sentenceSplits.size() - 1; i++) {
// Because the substring was passed to splitLongSentence()
// the returned positions need to be offset by chunkStart
chunks.add(new Chunk(chunkStart + sentenceSplits.get(i).start(), chunkStart + sentenceSplits.get(i).end()));
chunks.add(input.substring(chunkStart + sentenceSplits.get(i).start(), chunkStart + sentenceSplits.get(i).end()));
}
// The final split is partially filled.
// Set the next chunk start to the beginning of the
Expand All @@ -178,7 +151,7 @@ public List<Chunk> chunkOffset(String input, int maxNumberWordsPerChunk, boolean
}

if (chunkWordCount > 0) {
chunks.add(new Chunk(chunkStart, input.length()));
chunks.add(input.substring(chunkStart));
}

return chunks;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,6 @@ public List<String> chunk(String input, ChunkingSettings chunkingSettings) {
}
}

@Override
public List<Chunk> chunkOffset(String input, ChunkingSettings chunkingSettings) {
return List.of();
}

/**
* Break the input text into small chunks as dictated
* by the chunking parameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import org.elasticsearch.xpack.inference.mapper.OffsetSourceMetaFieldMapper;
import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper;
import org.elasticsearch.xpack.inference.mapper.SemanticTextUtils;
import org.elasticsearch.xpack.core.ml.search.SparseVectorQuery;
import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryWrapper;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -217,7 +217,7 @@ public void consumeTerms(Query query, Term... terms) {

@Override
public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
if (parent instanceof SparseVectorQuery sparseVectorQuery) {
if (parent instanceof SparseVectorQueryWrapper sparseVectorQuery) {
queries.add(sparseVectorQuery.getTermsQuery());
}
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;

public final class OffsetField extends Field {
public final class OffsetSourceField extends Field {

private static final FieldType FIELD_TYPE = new FieldType();

Expand All @@ -28,7 +28,7 @@ public final class OffsetField extends Field {
private int startOffset;
private int endOffset;

public OffsetField(String fieldName, String sourceFieldName, int startOffset, int endOffset) {
public OffsetSourceField(String fieldName, String sourceFieldName, int startOffset, int endOffset) {
super(fieldName, sourceFieldName, FIELD_TYPE);
this.startOffset = startOffset;
this.endOffset = endOffset;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
throw new IllegalArgumentException("Unkown field name [" + fieldName + "]");
}
}
context.doc().addWithKey(fullPath(), new OffsetField(NAME, fullPath() + "." + sourceFieldName, startOffset, endOffset));
context.doc().addWithKey(fullPath(), new OffsetSourceField(NAME, fullPath() + "." + sourceFieldName, startOffset, endOffset));
} finally {
context.path().setWithinLeafObject(isWithinLeafObject);
}
Expand Down
Loading

0 comments on commit 4f29bd8

Please sign in to comment.