forked from elastic/elasticsearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Expose Japanese completion filter to kuromoji analysis plugin (elasti…
…c#81858) This adds analysis factories of JapaneseCompletionFilter and JapaneseCompletionAnalyzer (https://issues.apache.org/jira/browse/LUCENE-10102) to the kuromoji plugin.
- Loading branch information
Showing
6 changed files
with
182 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
...n/java/org/elasticsearch/plugin/analysis/kuromoji/KuromojiCompletionAnalyzerProvider.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.plugin.analysis.kuromoji; | ||
|
||
import org.apache.lucene.analysis.ja.JapaneseCompletionAnalyzer; | ||
import org.apache.lucene.analysis.ja.JapaneseCompletionFilter.Mode; | ||
import org.apache.lucene.analysis.ja.dict.UserDictionary; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.env.Environment; | ||
import org.elasticsearch.index.IndexSettings; | ||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; | ||
|
||
public class KuromojiCompletionAnalyzerProvider extends AbstractIndexAnalyzerProvider<JapaneseCompletionAnalyzer> { | ||
|
||
private final JapaneseCompletionAnalyzer analyzer; | ||
|
||
public KuromojiCompletionAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { | ||
super(indexSettings, name, settings); | ||
final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings); | ||
final Mode mode = KuromojiCompletionFilterFactory.getMode(settings); | ||
analyzer = new JapaneseCompletionAnalyzer(userDictionary, mode); | ||
} | ||
|
||
@Override | ||
public JapaneseCompletionAnalyzer get() { | ||
return analyzer; | ||
} | ||
} |
46 changes: 46 additions & 0 deletions
46
...main/java/org/elasticsearch/plugin/analysis/kuromoji/KuromojiCompletionFilterFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.plugin.analysis.kuromoji; | ||
|
||
import org.apache.lucene.analysis.TokenStream; | ||
import org.apache.lucene.analysis.ja.JapaneseCompletionFilter; | ||
import org.apache.lucene.analysis.ja.JapaneseCompletionFilter.Mode; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.env.Environment; | ||
import org.elasticsearch.index.IndexSettings; | ||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; | ||
|
||
public class KuromojiCompletionFilterFactory extends AbstractTokenFilterFactory { | ||
|
||
private final Mode mode; | ||
|
||
public KuromojiCompletionFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { | ||
super(indexSettings, name, settings); | ||
mode = getMode(settings); | ||
} | ||
|
||
public static JapaneseCompletionFilter.Mode getMode(Settings settings) { | ||
JapaneseCompletionFilter.Mode mode = Mode.INDEX; | ||
String modeSetting = settings.get("mode", null); | ||
if (modeSetting != null) { | ||
if ("index".equalsIgnoreCase(modeSetting)) { | ||
mode = JapaneseCompletionFilter.Mode.INDEX; | ||
} else if ("query".equalsIgnoreCase(modeSetting)) { | ||
mode = JapaneseCompletionFilter.Mode.QUERY; | ||
} | ||
} | ||
return mode; | ||
} | ||
|
||
@Override | ||
public TokenStream create(TokenStream tokenStream) { | ||
return new JapaneseCompletionFilter(tokenStream, mode); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters