shiv0408 · shiv0408 · Sep 2, 2024 · Aug 20, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Fix for hasInitiatedFetching to fix allocation explain and manual reroute APIs (([#14972](https://github.com/opensearch-project/OpenSearch/pull/14972))
 - [Workload Management] Add queryGroupId to Task ([14708](https://github.com/opensearch-project/OpenSearch/pull/14708))
 - Add setting to ignore throttling nodes for allocation of unassigned primaries in remote restore ([#14991](https://github.com/opensearch-project/OpenSearch/pull/14991))
+- [Workload Management] Add Delete QueryGroup API Logic ([#14735](https://github.com/opensearch-project/OpenSearch/pull/14735))
 - [Streaming Indexing] Enhance RestClient with a new streaming API support ([#14437](https://github.com/opensearch-project/OpenSearch/pull/14437))
 - Add basic aggregation support for derived fields ([#14618](https://github.com/opensearch-project/OpenSearch/pull/14618))
 - [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680))- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680))
@@ -18,9 +19,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711))
 - Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054))
 - [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709))
+- [Workload Management] Add Settings for Workload Management feature ([#15028](https://github.com/opensearch-project/OpenSearch/pull/15028))
 - [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897))
 - Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774))
 - Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153))
+- Add allowlist setting for ingest-geoip and ingest-useragent ([#15325](https://github.com/opensearch-project/OpenSearch/pull/15325))
+- Adding access to noSubMatches and noOverlappingMatches in Hyphenation ([#13895](https://github.com/opensearch-project/OpenSearch/pull/13895))
+- Add support for index level max slice count setting for concurrent segment search ([#15336](https://github.com/opensearch-project/OpenSearch/pull/15336))
 
 ### Dependencies
 - Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081))
@@ -44,6 +49,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ### Changed
 - Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979))
+- Optimize regexp-based include/exclude on aggregations when pattern matches prefixes ([#14371](https://github.com/opensearch-project/OpenSearch/pull/14371))
 - Replace and block usages of org.apache.logging.log4j.util.Strings ([#15238](https://github.com/opensearch-project/OpenSearch/pull/15238))
 
 ### Deprecated

diff --git a/MAINTAINERS.md b/MAINTAINERS.md
@@ -22,7 +22,7 @@ This document contains a list of maintainers in this repo. See [opensearch-proje
 | Varun Bansal             | [linuxpi](https://github.com/linuxpi)                   | Amazon      |
 | Marc Handalian           | [mch2](https://github.com/mch2)                         | Amazon      |
 | Michael Froh             | [msfroh](https://github.com/msfroh)                     | Amazon      |
-| Nick Knize               | [nknize](https://github.com/nknize)                     | Amazon      |
+| Nick Knize               | [nknize](https://github.com/nknize)                     | Lucenia     |
 | Owais Kazi               | [owaiskazi19](https://github.com/owaiskazi19)           | Amazon      |
 | Peter Nied               | [peternied](https://github.com/peternied)               | Amazon      |
 | Rishikesh Pasham         | [Rishikesh1159](https://github.com/Rishikesh1159)       | Amazon      |

diff --git a/...c/main/java/org/opensearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java b/...c/main/java/org/opensearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java
@@ -54,11 +54,16 @@
  */
 public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundWordTokenFilterFactory {
 
+    private final boolean noSubMatches;
+    private final boolean noOverlappingMatches;
     private final HyphenationTree hyphenationTree;
 
     HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
         super(indexSettings, env, name, settings);
 
+        noSubMatches = settings.getAsBoolean("no_sub_matches", false);
+        noOverlappingMatches = settings.getAsBoolean("no_overlapping_matches", false);
+
         String hyphenationPatternsPath = settings.get("hyphenation_patterns_path", null);
         if (hyphenationPatternsPath == null) {
             throw new IllegalArgumentException("hyphenation_patterns_path is a required setting.");
@@ -85,7 +90,9 @@ public TokenStream create(TokenStream tokenStream) {
             minWordSize,
             minSubwordSize,
             maxSubwordSize,
-            onlyLongestMatch
+            onlyLongestMatch,
+            noSubMatches,
+            noOverlappingMatches
         );
     }
 }
diff --git a/...s/analysis-common/src/test/java/org/opensearch/analysis/common/CompoundAnalysisTests.java b/...s/analysis-common/src/test/java/org/opensearch/analysis/common/CompoundAnalysisTests.java
@@ -50,8 +50,12 @@
 import org.opensearch.test.IndexSettingsModule;
 import org.opensearch.test.OpenSearchTestCase;
 import org.hamcrest.MatcherAssert;
+import org.junit.Before;
 
 import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -63,17 +67,27 @@
 import static org.hamcrest.Matchers.instanceOf;
 
 public class CompoundAnalysisTests extends OpenSearchTestCase {
+
+    Settings[] settingsArr;
+
+    @Before
+    public void initialize() throws IOException {
+        final Path home = createTempDir();
+        copyHyphenationPatternsFile(home);
+        this.settingsArr = new Settings[] { getJsonSettings(home), getYamlSettings(home) };
+    }
+
     public void testDefaultsCompoundAnalysis() throws Exception {
-        Settings settings = getJsonSettings();
-        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
-        AnalysisModule analysisModule = createAnalysisModule(settings);
-        TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec");
-        MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
+        for (Settings settings : this.settingsArr) {
+            IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
+            AnalysisModule analysisModule = createAnalysisModule(settings);
+            TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec");
+            MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
+        }
     }
 
     public void testDictionaryDecompounder() throws Exception {
-        Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
-        for (Settings settings : settingsArr) {
+        for (Settings settings : this.settingsArr) {
             List<String> terms = analyze(settings, "decompoundingAnalyzer", "donaudampfschiff spargelcremesuppe");
             MatcherAssert.assertThat(terms.size(), equalTo(8));
             MatcherAssert.assertThat(
@@ -83,6 +97,26 @@ public void testDictionaryDecompounder() throws Exception {
         }
     }
 
+    // Hyphenation Decompounder tests mimic the behavior of lucene tests
+    // lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java
+    public void testHyphenationDecompounder() throws Exception {
+        for (Settings settings : this.settingsArr) {
+            List<String> terms = analyze(settings, "hyphenationAnalyzer", "min veninde som er lidt af en læsehest");
+            MatcherAssert.assertThat(terms.size(), equalTo(10));
+            MatcherAssert.assertThat(terms, hasItems("min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest"));
+        }
+    }
+
+    // Hyphenation Decompounder tests mimic the behavior of lucene tests
+    // lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java
+    public void testHyphenationDecompounderNoSubMatches() throws Exception {
+        for (Settings settings : this.settingsArr) {
+            List<String> terms = analyze(settings, "hyphenationAnalyzerNoSubMatches", "basketballkurv");
+            MatcherAssert.assertThat(terms.size(), equalTo(3));
+            MatcherAssert.assertThat(terms, hasItems("basketballkurv", "basketball", "kurv"));
+        }
+    }
+
     private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
         AnalysisModule analysisModule = createAnalysisModule(settings);
@@ -111,21 +145,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
         }));
     }
 
-    private Settings getJsonSettings() throws IOException {
+    private void copyHyphenationPatternsFile(Path home) throws IOException {
+        InputStream hyphenation_patterns_path = getClass().getResourceAsStream("da_UTF8.xml");
+        Path config = home.resolve("config");
+        Files.createDirectory(config);
+        Files.copy(hyphenation_patterns_path, config.resolve("da_UTF8.xml"));
+    }
+
+    private Settings getJsonSettings(Path home) throws IOException {
         String json = "/org/opensearch/analysis/common/test1.json";
         return Settings.builder()
             .loadFromStream(json, getClass().getResourceAsStream(json), false)
             .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
-            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put(Environment.PATH_HOME_SETTING.getKey(), home.toString())
             .build();
     }
 
-    private Settings getYamlSettings() throws IOException {
+    private Settings getYamlSettings(Path home) throws IOException {
         String yaml = "/org/opensearch/analysis/common/test1.yml";
         return Settings.builder()
             .loadFromStream(yaml, getClass().getResourceAsStream(yaml), false)
             .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
-            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put(Environment.PATH_HOME_SETTING.getKey(), home.toString())
             .build();
     }
 }