From fb66d0f9eec625cfc6078ad903834d715ed121c8 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Sat, 26 Oct 2024 11:53:11 -0700 Subject: [PATCH] Add custom synonym_analyzer Signed-off-by: Prudhvi Godithi --- .../common/CommonAnalysisModulePlugin.java | 21 +++++++++++++++++-- .../SynonymGraphTokenFilterFactory.java | 5 +++-- .../common/SynonymTokenFilterFactory.java | 19 ++++++++++++++++- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java index f14e499081ce9..36a4233e6fcee 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java @@ -146,6 +146,7 @@ import org.opensearch.index.analysis.PreConfiguredTokenizer; import org.opensearch.index.analysis.TokenFilterFactory; import org.opensearch.index.analysis.TokenizerFactory; +import org.opensearch.indices.analysis.AnalysisModule; import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.opensearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy; import org.opensearch.plugins.AnalysisPlugin; @@ -157,6 +158,7 @@ import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -176,6 +178,9 @@ public class CommonAnalysisModulePlugin extends Plugin implements AnalysisPlugin private final SetOnce scriptService = new SetOnce<>(); + private AnalysisModule analysisModule; + + @Override public Collection createComponents( Client client, @@ -191,9 +196,16 @@ public Collection createComponents( Supplier repositoriesServiceSupplier ) { this.scriptService.set(scriptService); + try { + this.analysisModule = new AnalysisModule(environment, + List.of(this)); + } catch (IOException e) { + throw new RuntimeException(e); + } return Collections.emptyList(); } + @Override public List> getContexts() { return Collections.singletonList(AnalysisPredicateScript.CONTEXT); @@ -332,8 +344,13 @@ public Map> getTokenFilters() { filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new); filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new)); filters.put("stemmer", StemmerTokenFilterFactory::new); - filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new)); - filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new)); + filters.put("synonym", (indexSettings, environment, name, settings) -> + new SynonymTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry()) + ); + filters.put("synonym_graph", (indexSettings, environment, name, settings) -> + new SynonymGraphTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry()) + ); + filters.put("trim", TrimTokenFilterFactory::new); filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new)); filters.put("unique", UniqueTokenFilterFactory::new); diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java index fed959108c411..d1f1ebfd906f6 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java @@ -40,6 +40,7 @@ import org.opensearch.env.Environment; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.AnalysisMode; +import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.analysis.CharFilterFactory; import org.opensearch.index.analysis.TokenFilterFactory; import org.opensearch.index.analysis.TokenizerFactory; @@ -49,8 +50,8 @@ public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory { - SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, env, name, settings); + SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) { + super(indexSettings, env, name, settings, analysisRegistry); } @Override diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java index 01a65e87d7466..a1363c0c587a9 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java @@ -48,7 +48,9 @@ import org.opensearch.index.analysis.CustomAnalyzer; import org.opensearch.index.analysis.TokenFilterFactory; import org.opensearch.index.analysis.TokenizerFactory; +import org.opensearch.index.analysis.AnalysisRegistry; +import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.List; @@ -64,8 +66,10 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { protected final Settings settings; protected final Environment environment; protected final AnalysisMode analysisMode; + private final String synonymAnalyzer; + private final AnalysisRegistry analysisRegistry; - SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) { super(indexSettings, name, settings); this.settings = settings; @@ -83,6 +87,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { boolean updateable = settings.getAsBoolean("updateable", false); this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL; this.environment = env; + this.synonymAnalyzer = settings.get("synonym_analyzer", null); + this.analysisRegistry = analysisRegistry; } @Override @@ -137,6 +143,17 @@ Analyzer buildSynonymAnalyzer( List tokenFilters, Function allFilters ) { + if (synonymAnalyzer != null) { + Analyzer customSynonymAnalyzer; + try { + customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzer); + } catch (IOException e) { + throw new RuntimeException(e); + } + if (customSynonymAnalyzer != null) { + return customSynonymAnalyzer; + } + } return new CustomAnalyzer( tokenizer, charFilters.toArray(new CharFilterFactory[0]),