forked from opensearch-project/OpenSearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add kuromoji_completion analyzer and filter (opensearch-project#4835) (…
…opensearch-project#12287) * Add kuromoji_completion analyzer and filter (opensearch-project#4835) Signed-off-by: Tatsuya Kawakami <[email protected]> * Use INDEX mode if an invalid value is set for mode in the kuromoji_completion filter Signed-off-by: Tatsuya Kawakami <[email protected]> --------- Signed-off-by: Tatsuya Kawakami <[email protected]>
- Loading branch information
Showing
8 changed files
with
200 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
...omoji/src/main/java/org/opensearch/index/analysis/KuromojiCompletionAnalyzerProvider.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.index.analysis; | ||
|
||
import org.apache.lucene.analysis.ja.JapaneseCompletionAnalyzer; | ||
import org.apache.lucene.analysis.ja.JapaneseCompletionFilter; | ||
import org.apache.lucene.analysis.ja.dict.UserDictionary; | ||
import org.opensearch.common.settings.Settings; | ||
import org.opensearch.env.Environment; | ||
import org.opensearch.index.IndexSettings; | ||
|
||
public class KuromojiCompletionAnalyzerProvider extends AbstractIndexAnalyzerProvider<JapaneseCompletionAnalyzer> { | ||
|
||
private final JapaneseCompletionAnalyzer analyzer; | ||
|
||
public KuromojiCompletionAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { | ||
super(indexSettings, name, settings); | ||
final JapaneseCompletionFilter.Mode mode = KuromojiCompletionFilterFactory.getMode(settings); | ||
final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings); | ||
analyzer = new JapaneseCompletionAnalyzer(userDictionary, mode); | ||
} | ||
|
||
@Override | ||
public JapaneseCompletionAnalyzer get() { | ||
return this.analyzer; | ||
} | ||
|
||
} |
42 changes: 42 additions & 0 deletions
42
...kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiCompletionFilterFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.index.analysis; | ||
|
||
import org.apache.lucene.analysis.TokenStream; | ||
import org.apache.lucene.analysis.ja.JapaneseCompletionFilter; | ||
import org.apache.lucene.analysis.ja.JapaneseCompletionFilter.Mode; | ||
import org.opensearch.common.settings.Settings; | ||
import org.opensearch.env.Environment; | ||
import org.opensearch.index.IndexSettings; | ||
|
||
public class KuromojiCompletionFilterFactory extends AbstractTokenFilterFactory { | ||
private final Mode mode; | ||
|
||
public KuromojiCompletionFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { | ||
super(indexSettings, name, settings); | ||
this.mode = getMode(settings); | ||
} | ||
|
||
public static Mode getMode(Settings settings) { | ||
String modeSetting = settings.get("mode", null); | ||
if (modeSetting != null) { | ||
if ("index".equalsIgnoreCase(modeSetting)) { | ||
return Mode.INDEX; | ||
} else if ("query".equalsIgnoreCase(modeSetting)) { | ||
return Mode.QUERY; | ||
} | ||
} | ||
return Mode.INDEX; | ||
} | ||
|
||
@Override | ||
public TokenStream create(TokenStream tokenStream) { | ||
return new JapaneseCompletionFilter(tokenStream, mode); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters