Skip to content

Commit

Permalink
synonym_analyzer configuration setting
Browse files Browse the repository at this point in the history
Signed-off-by: Prudhvi Godithi <[email protected]>
  • Loading branch information
prudhvigodithi committed Nov 6, 2024
1 parent 2b75b4d commit 7d6fc43
Show file tree
Hide file tree
Showing 8 changed files with 17 additions and 53 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add Setting to adjust the primary constraint weights ([#16471](https://github.com/opensearch-project/OpenSearch/pull/16471))
- Switch from `buildSrc/version.properties` to Gradle version catalog (`gradle/libs.versions.toml`) to enable dependabot to perform automated upgrades on common libs ([#16284](https://github.com/opensearch-project/OpenSearch/pull/16284))
- Add dynamic setting allowing size > 0 requests to be cached in the request cache ([#16483](https://github.com/opensearch-project/OpenSearch/pull/16483/files))
- Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)).

### Dependencies
- Bump `com.azure:azure-storage-common` from 12.25.1 to 12.27.1 ([#16521](https://github.com/opensearch-project/OpenSearch/pull/16521))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1216,18 +1216,14 @@ private void createConfiguration() {
);

final List<Path> configFiles;
try (Stream<Path> stream = Files.walk(getDistroDir().resolve("config"))) {
try (Stream<Path> stream = Files.list(getDistroDir().resolve("config"))) {
configFiles = stream.collect(Collectors.toList());
}
logToProcessStdout("Copying additional config files from distro " + configFiles);
for (Path file : configFiles) {
Path relativePath = getDistroDir().resolve("config").relativize(file);
Path dest = configFile.getParent().resolve(relativePath);
if (Files.isDirectory(file)) {
Files.createDirectories(dest);
} else {
Files.createDirectories(dest.getParent());
Files.copy(file, dest, StandardCopyOption.REPLACE_EXISTING);
Path dest = configFile.getParent().resolve(file.getFileName());
if (Files.exists(dest) == false) {
Files.copy(file, dest);
}
}
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAn
}

@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
filters.put("apostrophe", ApostropheFilterFactory::new);
filters.put("arabic_normalization", ArabicNormalizationFilterFactory::new);
Expand Down Expand Up @@ -339,12 +339,6 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
filters.put("uppercase", UpperCaseTokenFilterFactory::new);
filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
return filters;
}

@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
Map<String, AnalysisProvider<TokenFilterFactory>> filters = getTokenFilters();
filters.put(
"synonym",
requiresAnalysisSettings(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
protected final Settings settings;
protected final Environment environment;
protected final AnalysisMode analysisMode;
private final String synonymAnalyzer;
private final String synonymAnalyzerName;
private final AnalysisRegistry analysisRegistry;

SynonymTokenFilterFactory(
Expand All @@ -93,7 +93,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
boolean updateable = settings.getAsBoolean("updateable", false);
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
this.environment = env;
this.synonymAnalyzer = settings.get("synonym_analyzer", null);
this.synonymAnalyzerName = settings.get("synonym_analyzer", null);
this.analysisRegistry = analysisRegistry;
}

Expand Down Expand Up @@ -149,10 +149,10 @@ Analyzer buildSynonymAnalyzer(
List<TokenFilterFactory> tokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
if (synonymAnalyzer != null) {
if (synonymAnalyzerName != null) {
Analyzer customSynonymAnalyzer;
try {
customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzer);
customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzerName);
} catch (IOException e) {
throw new RuntimeException(e);

Check warning on line 157 in modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java

View check run for this annotation

Codecov / codecov/patch

modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java#L156-L157

Added lines #L156 - L157 were not covered by tests
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,7 @@ private void markedTestCase(String name, Map<String, Class<?>> map) {
* Tests the getTokenFilters(AnalysisModule) method to verify:
* 1. All token filters are properly loaded
* 2. Basic filters remain available
* 3. Synonym filters are added when AnalysisModule is provided
* 4. The total number of filters is correct (base filters + synonym filters)
* 3. Synonym filters remain available when AnalysisModule is provided
*/
public void testGetTokenFiltersWithAnalysisModule() {
CommonAnalysisModulePlugin plugin = (CommonAnalysisModulePlugin) getAnalysisPlugin();
Expand All @@ -324,24 +323,5 @@ public void testGetTokenFiltersWithAnalysisModule() {
assertTrue("Should contain basic filters", filters.containsKey("lowercase"));
assertTrue("Should contain synonym filter", filters.containsKey("synonym"));
assertTrue("Should contain synonym_graph filter", filters.containsKey("synonym_graph"));
Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> baseFilters = plugin.getTokenFilters();
assertEquals("Should contain additional synonym filters", baseFilters.size() + 2, filters.size());
}

/**
* Tests that synonym-related token filters are only available when an AnalysisModule is provided.
* This test verifies that:
* 1. Base getTokenFilters() does not include synonym filters
* 2. Extended getTokenFilters(AnalysisModule) includes synonym filters
* 3. Both synonym and synonym_graph filters require AnalysisModule
*/
public void testSynonymFiltersRequireAnalysisModule() {
CommonAnalysisModulePlugin plugin = (CommonAnalysisModulePlugin) getAnalysisPlugin();
Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> baseFilters = plugin.getTokenFilters();
Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> extendedFilters = plugin.getTokenFilters(analysisModule);
assertFalse("Base filters should not contain synonym filter", baseFilters.containsKey("synonym"));
assertTrue("Extended filters should contain synonym filter", extendedFilters.containsKey("synonym"));
assertFalse("Base filters should not contain synonym_graph filter", baseFilters.containsKey("synonym_graph"));
assertTrue("Extended filters should contain synonym_graph filter", extendedFilters.containsKey("synonym_graph"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -260,15 +260,13 @@ public void testTokenFiltersBypassSynonymAnalysis() throws IOException {
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
Environment environment = TestEnvironment.newEnvironment(settings);

// Initialize analysis registry
AnalysisModule module = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin()));
AnalysisRegistry analysisRegistry = module.getAnalysisRegistry();
AnalysisModule analysisModule = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin()));
AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
String[] bypassingFactories = new String[] { "dictionary_decompounder" };

CommonAnalysisModulePlugin plugin = new CommonAnalysisModulePlugin();
for (String factory : bypassingFactories) {
TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, environment, factory, settings);
TokenFilterFactory tff = plugin.getTokenFilters(analysisModule).get(factory).get(idxSettings, environment, factory, settings);
TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings);
SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry);
Analyzer analyzer = stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null);
Expand Down Expand Up @@ -330,8 +328,6 @@ public void testDisallowedTokenFilters() throws IOException {

Environment environment = TestEnvironment.newEnvironment(settings);
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);

// Create analysis module
AnalysisModule analysisModule = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisModulePlugin()));
AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
CommonAnalysisModulePlugin plugin = new CommonAnalysisModulePlugin();
Expand All @@ -347,7 +343,7 @@ public void testDisallowedTokenFilters() throws IOException {
"fingerprint" };

for (String factory : disallowedFactories) {
TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, environment, factory, settings);
TokenFilterFactory tff = plugin.getTokenFilters(analysisModule).get(factory).get(idxSettings, environment, factory, settings);
TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings);
SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ default Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {

/**
* Override to add additional {@link TokenFilter}s that need access to the AnalysisModule.
* The default implementation calls the existing getTokenFilters() method for backward compatibility.
* The default implementation for plugins that don't need AnalysisModule calls the existing getTokenFilters() method.
*/
default Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
return getTokenFilters();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,10 +526,7 @@ public boolean incrementToken() throws IOException {

/**
* Tests registration and functionality of token filters that require access to the AnalysisModule.
* This test verifies:
* 1. Token filter registration using the extended getTokenFilters(AnalysisModule) method
* 2. Filter functionality in both predefined and custom analyzers
* 3. Proper access to AnalysisModule reference within filter factory creation
* This test verifies the token filter registration using the extended getTokenFilters(AnalysisModule) method
*/
public void testTokenFilterRegistrationWithModuleReference() throws IOException {
class TestPlugin implements AnalysisPlugin {
Expand Down

0 comments on commit 7d6fc43

Please sign in to comment.