Skip to content

Commit

Permalink
[ML] Support any tokenizers for text_expansion task (elastic#116935)
Browse files Browse the repository at this point in the history
* Support any tokenizers for text_expansion task

* Remove unused imports

---------

Co-authored-by: Elastic Machine <[email protected]>
  • Loading branch information
2 people authored and davidkyle committed Nov 19, 2024
1 parent 23f9726 commit 9f4baa4
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,6 @@ public TextExpansionConfig(
this.vocabularyConfig = Optional.ofNullable(vocabularyConfig)
.orElse(new VocabularyConfig(InferenceIndexConstants.nativeDefinitionStore()));
this.tokenization = tokenization == null ? Tokenization.createDefault() : tokenization;
if (this.tokenization instanceof BertTokenization == false) {
throw ExceptionsHelper.badRequestException(
"text expansion models must be configured with BERT tokenizer, [{}] given",
this.tokenization.getName()
);
}
this.resultsField = resultsField;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@

package org.elasticsearch.xpack.core.ml.inference.trainedmodel;

import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.TransportVersion;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xpack.core.ml.inference.InferenceConfigItemTestCase;

Expand Down Expand Up @@ -67,13 +65,4 @@ protected TextExpansionConfig doParseInstance(XContentParser parser) throws IOEx
protected TextExpansionConfig mutateInstanceForVersion(TextExpansionConfig instance, TransportVersion version) {
return instance;
}

public void testBertTokenizationOnly() {
ElasticsearchStatusException e = expectThrows(
ElasticsearchStatusException.class,
() -> new TextExpansionConfig(null, RobertaTokenizationTests.createRandom(), null)
);
assertEquals(RestStatus.BAD_REQUEST, e.status());
assertEquals("text expansion models must be configured with BERT tokenizer, [roberta] given", e.getMessage());
}
}

0 comments on commit 9f4baa4

Please sign in to comment.