diff --git a/CHANGELOG.md b/CHANGELOG.md index edbf7c8ed065c..c875f440ad7f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add Setting to adjust the primary constraint weights ([#16471](https://github.com/opensearch-project/OpenSearch/pull/16471)) - Switch from `buildSrc/version.properties` to Gradle version catalog (`gradle/libs.versions.toml`) to enable dependabot to perform automated upgrades on common libs ([#16284](https://github.com/opensearch-project/OpenSearch/pull/16284)) - Add dynamic setting allowing size > 0 requests to be cached in the request cache ([#16483](https://github.com/opensearch-project/OpenSearch/pull/16483/files)) +- Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)). ### Dependencies - Bump `com.azure:azure-storage-common` from 12.25.1 to 12.27.1 ([#16521](https://github.com/opensearch-project/OpenSearch/pull/16521)) diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java index bb409c2afd871..cd22560af9a96 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java @@ -1216,18 +1216,14 @@ private void createConfiguration() { ); final List configFiles; - try (Stream stream = Files.walk(getDistroDir().resolve("config"))) { + try (Stream stream = Files.list(getDistroDir().resolve("config"))) { configFiles = stream.collect(Collectors.toList()); } logToProcessStdout("Copying additional config files from distro " + configFiles); for (Path file : configFiles) { - Path relativePath = getDistroDir().resolve("config").relativize(file); - Path dest = configFile.getParent().resolve(relativePath); - if (Files.isDirectory(file)) { - Files.createDirectories(dest); - } else { - Files.createDirectories(dest.getParent()); - Files.copy(file, dest, StandardCopyOption.REPLACE_EXISTING); + Path dest = configFile.getParent().resolve(file.getFileName()); + if (Files.exists(dest) == false) { + Files.copy(file, dest); } } } catch (IOException e) { diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java index 763c1783c2d28..7f9437972a358 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java @@ -248,7 +248,7 @@ public Map>> getAn } @Override - public Map> getTokenFilters() { + public Map> getTokenFilters(AnalysisModule analysisModule) { Map> filters = new TreeMap<>(); filters.put("apostrophe", ApostropheFilterFactory::new); filters.put("arabic_normalization", ArabicNormalizationFilterFactory::new); @@ -339,12 +339,6 @@ public Map> getTokenFilters() { filters.put("uppercase", UpperCaseTokenFilterFactory::new); filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new); filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new); - return filters; - } - - @Override - public Map> getTokenFilters(AnalysisModule analysisModule) { - Map> filters = getTokenFilters(); filters.put( "synonym", requiresAnalysisSettings( diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java index 86417a579628c..1cd78170e66c8 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java @@ -66,7 +66,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { protected final Settings settings; protected final Environment environment; protected final AnalysisMode analysisMode; - private final String synonymAnalyzer; + private final String synonymAnalyzerName; private final AnalysisRegistry analysisRegistry; SynonymTokenFilterFactory( @@ -93,7 +93,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { boolean updateable = settings.getAsBoolean("updateable", false); this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL; this.environment = env; - this.synonymAnalyzer = settings.get("synonym_analyzer", null); + this.synonymAnalyzerName = settings.get("synonym_analyzer", null); this.analysisRegistry = analysisRegistry; } @@ -149,10 +149,10 @@ Analyzer buildSynonymAnalyzer( List tokenFilters, Function allFilters ) { - if (synonymAnalyzer != null) { + if (synonymAnalyzerName != null) { Analyzer customSynonymAnalyzer; try { - customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzer); + customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzerName); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java index baad489d33649..1f4faf53dced5 100644 --- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java @@ -314,8 +314,7 @@ private void markedTestCase(String name, Map> map) { * Tests the getTokenFilters(AnalysisModule) method to verify: * 1. All token filters are properly loaded * 2. Basic filters remain available - * 3. Synonym filters are added when AnalysisModule is provided - * 4. The total number of filters is correct (base filters + synonym filters) + * 3. Synonym filters remain available when AnalysisModule is provided */ public void testGetTokenFiltersWithAnalysisModule() { CommonAnalysisModulePlugin plugin = (CommonAnalysisModulePlugin) getAnalysisPlugin(); @@ -324,24 +323,5 @@ public void testGetTokenFiltersWithAnalysisModule() { assertTrue("Should contain basic filters", filters.containsKey("lowercase")); assertTrue("Should contain synonym filter", filters.containsKey("synonym")); assertTrue("Should contain synonym_graph filter", filters.containsKey("synonym_graph")); - Map> baseFilters = plugin.getTokenFilters(); - assertEquals("Should contain additional synonym filters", baseFilters.size() + 2, filters.size()); - } - - /** - * Tests that synonym-related token filters are only available when an AnalysisModule is provided. - * This test verifies that: - * 1. Base getTokenFilters() does not include synonym filters - * 2. Extended getTokenFilters(AnalysisModule) includes synonym filters - * 3. Both synonym and synonym_graph filters require AnalysisModule - */ - public void testSynonymFiltersRequireAnalysisModule() { - CommonAnalysisModulePlugin plugin = (CommonAnalysisModulePlugin) getAnalysisPlugin(); - Map> baseFilters = plugin.getTokenFilters(); - Map> extendedFilters = plugin.getTokenFilters(analysisModule); - assertFalse("Base filters should not contain synonym filter", baseFilters.containsKey("synonym")); - assertTrue("Extended filters should contain synonym filter", extendedFilters.containsKey("synonym")); - assertFalse("Base filters should not contain synonym_graph filter", baseFilters.containsKey("synonym_graph")); - assertTrue("Extended filters should contain synonym_graph filter", extendedFilters.containsKey("synonym_graph")); } } diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java index e7b378701c166..33d92e01a85b1 100644 --- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java +++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java @@ -260,15 +260,13 @@ public void testTokenFiltersBypassSynonymAnalysis() throws IOException { .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); Environment environment = TestEnvironment.newEnvironment(settings); - - // Initialize analysis registry - AnalysisModule module = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin())); - AnalysisRegistry analysisRegistry = module.getAnalysisRegistry(); + AnalysisModule analysisModule = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin())); + AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry(); String[] bypassingFactories = new String[] { "dictionary_decompounder" }; CommonAnalysisModulePlugin plugin = new CommonAnalysisModulePlugin(); for (String factory : bypassingFactories) { - TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, environment, factory, settings); + TokenFilterFactory tff = plugin.getTokenFilters(analysisModule).get(factory).get(idxSettings, environment, factory, settings); TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings); SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry); Analyzer analyzer = stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null); @@ -330,8 +328,6 @@ public void testDisallowedTokenFilters() throws IOException { Environment environment = TestEnvironment.newEnvironment(settings); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); - - // Create analysis module AnalysisModule analysisModule = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin())); AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry(); CommonAnalysisModulePlugin plugin = new CommonAnalysisModulePlugin(); @@ -347,7 +343,7 @@ public void testDisallowedTokenFilters() throws IOException { "fingerprint" }; for (String factory : disallowedFactories) { - TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, environment, factory, settings); + TokenFilterFactory tff = plugin.getTokenFilters(analysisModule).get(factory).get(idxSettings, environment, factory, settings); TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings); SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry); diff --git a/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java b/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java index a7c4604a30553..58e43633777c9 100644 --- a/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java +++ b/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java @@ -87,7 +87,7 @@ default Map> getCharFilters() { /** * Override to add additional {@link TokenFilter}s that need access to the AnalysisModule. - * The default implementation calls the existing getTokenFilters() method for backward compatibility. + * The default implementation for plugins that don't need AnalysisModule calls the existing getTokenFilters() method. */ default Map> getTokenFilters(AnalysisModule analysisModule) { return getTokenFilters(); diff --git a/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java b/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java index 625f79b26b01b..74bc987c44b15 100644 --- a/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java +++ b/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java @@ -526,10 +526,7 @@ public boolean incrementToken() throws IOException { /** * Tests registration and functionality of token filters that require access to the AnalysisModule. - * This test verifies: - * 1. Token filter registration using the extended getTokenFilters(AnalysisModule) method - * 2. Filter functionality in both predefined and custom analyzers - * 3. Proper access to AnalysisModule reference within filter factory creation + * This test verifies the token filter registration using the extended getTokenFilters(AnalysisModule) method */ public void testTokenFilterRegistrationWithModuleReference() throws IOException { class TestPlugin implements AnalysisPlugin {