From 6f49773cae9110ac34e559ff5add87dd0fba7117 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Tue, 19 Nov 2024 14:56:46 +0000 Subject: [PATCH] [ML] Support any tokenizers for text_expansion task (#116935) (#117018) Backport of #116935 --- .../inference/trainedmodel/TextExpansionConfig.java | 6 ------ .../trainedmodel/TextExpansionConfigTests.java | 11 ----------- 2 files changed, 17 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/TextExpansionConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/TextExpansionConfig.java index f4ac89124cddb..68e0f7e1ac885 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/TextExpansionConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/TextExpansionConfig.java @@ -76,12 +76,6 @@ public TextExpansionConfig( this.vocabularyConfig = Optional.ofNullable(vocabularyConfig) .orElse(new VocabularyConfig(InferenceIndexConstants.nativeDefinitionStore())); this.tokenization = tokenization == null ? Tokenization.createDefault() : tokenization; - if (this.tokenization instanceof BertTokenization == false) { - throw ExceptionsHelper.badRequestException( - "text expansion models must be configured with BERT tokenizer, [{}] given", - this.tokenization.getName() - ); - } this.resultsField = resultsField; } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/TextExpansionConfigTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/TextExpansionConfigTests.java index cf4630899ab53..a91cceec8a167 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/TextExpansionConfigTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/TextExpansionConfigTests.java @@ -7,10 +7,8 @@ package org.elasticsearch.xpack.core.ml.inference.trainedmodel; -import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.TransportVersion; import org.elasticsearch.common.io.stream.Writeable; -import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xpack.core.ml.inference.InferenceConfigItemTestCase; @@ -67,13 +65,4 @@ protected TextExpansionConfig doParseInstance(XContentParser parser) throws IOEx protected TextExpansionConfig mutateInstanceForVersion(TextExpansionConfig instance, TransportVersion version) { return instance; } - - public void testBertTokenizationOnly() { - ElasticsearchStatusException e = expectThrows( - ElasticsearchStatusException.class, - () -> new TextExpansionConfig(null, RobertaTokenizationTests.createRandom(), null) - ); - assertEquals(RestStatus.BAD_REQUEST, e.status()); - assertEquals("text expansion models must be configured with BERT tokenizer, [roberta] given", e.getMessage()); - } }