From 1439e37cab83e995892ea67b1868a1f43a181759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=CC=88rg=20Prante?= Date: Fri, 14 Nov 2014 23:59:13 +0100 Subject: [PATCH] update to Elasticsearch 1.4, refactoring --- README.md | 6 +- pom.xml | 14 +- .../action/langdetect/LangdetectRequest.java | 11 +- .../langdetect/LangdetectRequestBuilder.java | 5 + .../action/langdetect/LangdetectResponse.java | 5 +- .../langdetect/TransportLangdetectAction.java | 67 +--- .../common/langdetect/Detector.java | 308 ---------------- .../common/langdetect/LangProfile.java | 76 ---- .../common/langdetect/Messages.java | 21 -- .../common/langdetect/NGram.java | 173 --------- .../analysis/langdetect/LangProfile.java | 64 ++++ .../analysis}/langdetect/Language.java | 2 +- .../LanguageDetectionException.java | 4 +- .../index/analysis/langdetect/NGram.java | 239 ++++++++++++ .../mapper/langdetect/LangdetectMapper.java | 243 +++++++++--- .../module/langdetect/LangdetectModule.java | 1 - .../module/langdetect/LangdetectService.java | 347 ++++++++++++++++++ .../langdetect/RegisterLangdetectType.java | 7 +- .../plugin/langdetect/LangdetectPlugin.java | 4 +- .../langdetect/RestLangdetectAction.java | 5 +- .../elasticsearch/common => }/langdetect/af | 0 .../elasticsearch/common => }/langdetect/ar | 0 .../elasticsearch/common => }/langdetect/bg | 0 .../elasticsearch/common => }/langdetect/bn | 0 .../elasticsearch/common => }/langdetect/cs | 0 .../elasticsearch/common => }/langdetect/da | 0 .../elasticsearch/common => }/langdetect/de | 0 .../elasticsearch/common => }/langdetect/el | 0 .../elasticsearch/common => }/langdetect/en | 0 .../elasticsearch/common => }/langdetect/es | 0 .../elasticsearch/common => }/langdetect/et | 0 .../elasticsearch/common => }/langdetect/fa | 0 .../elasticsearch/common => }/langdetect/fi | 0 .../elasticsearch/common => }/langdetect/fr | 0 .../elasticsearch/common => }/langdetect/gu | 0 .../elasticsearch/common => }/langdetect/he | 0 .../elasticsearch/common => }/langdetect/hi | 0 .../elasticsearch/common => }/langdetect/hr | 0 .../elasticsearch/common => }/langdetect/hu | 0 .../elasticsearch/common => }/langdetect/id | 0 .../elasticsearch/common => }/langdetect/it | 0 .../elasticsearch/common => }/langdetect/ja | 0 .../elasticsearch/common => }/langdetect/kn | 0 .../elasticsearch/common => }/langdetect/ko | 0 src/main/resources/langdetect/language.json | 182 +++++++++ .../elasticsearch/common => }/langdetect/lt | 0 .../elasticsearch/common => }/langdetect/lv | 0 .../elasticsearch/common => }/langdetect/mk | 0 .../elasticsearch/common => }/langdetect/ml | 0 .../elasticsearch/common => }/langdetect/mr | 0 .../elasticsearch/common => }/langdetect/ne | 0 .../elasticsearch/common => }/langdetect/nl | 0 .../elasticsearch/common => }/langdetect/no | 0 .../elasticsearch/common => }/langdetect/pa | 0 .../elasticsearch/common => }/langdetect/pl | 0 .../elasticsearch/common => }/langdetect/pt | 0 .../elasticsearch/common => }/langdetect/ro | 0 .../elasticsearch/common => }/langdetect/ru | 0 .../elasticsearch/common => }/langdetect/sk | 0 .../elasticsearch/common => }/langdetect/sl | 0 .../elasticsearch/common => }/langdetect/so | 0 .../elasticsearch/common => }/langdetect/sq | 0 .../elasticsearch/common => }/langdetect/sv | 0 .../elasticsearch/common => }/langdetect/sw | 0 .../elasticsearch/common => }/langdetect/ta | 0 .../elasticsearch/common => }/langdetect/te | 0 .../elasticsearch/common => }/langdetect/th | 0 .../elasticsearch/common => }/langdetect/tl | 0 .../elasticsearch/common => }/langdetect/tr | 0 .../elasticsearch/common => }/langdetect/uk | 0 .../elasticsearch/common => }/langdetect/ur | 0 .../elasticsearch/common => }/langdetect/vi | 0 .../common => }/langdetect/zh-cn | 0 .../common => }/langdetect/zh-tw | 0 .../common/langdetect/languages.properties | 53 --- .../common/langdetect/messages.properties | 128 ------- .../common/langdetect/DetectorTest.java | 85 ----- .../common/langdetect/SimpleDetectorTest.java | 18 - .../langdetect/DetectLanguageTest.java | 22 +- .../index/mapper/langdetect/DetectorTest.java | 79 ++++ .../mapper}/langdetect/LangProfileTest.java | 22 +- .../langdetect/LangdetectMappingTest.java | 119 +++--- .../mapper}/langdetect/LanguageTest.java | 10 +- .../mapper}/langdetect/NGramTest.java | 8 +- .../mapper/langdetect/SimpleDetectorTest.java | 22 ++ src/test/resources/base64-mapping.json | 7 - src/test/resources/log4j.properties | 5 - src/test/resources/log4j2.xml | 13 + .../mapper/langdetect}/base64-decoded.txt | 0 .../mapper/langdetect/base64-mapping.json | 10 + .../index/mapper/langdetect}/base64.txt | 0 .../index/mapper/langdetect}/chinese.txt | 0 .../index/mapper/langdetect}/english.txt | 0 .../index/mapper/langdetect}/japanese.txt | 0 .../index/mapper/langdetect}/korean.txt | 0 .../mapper/langdetect}/simple-mapping.json | 0 96 files changed, 1291 insertions(+), 1094 deletions(-) delete mode 100644 src/main/java/org/xbib/elasticsearch/common/langdetect/Detector.java delete mode 100644 src/main/java/org/xbib/elasticsearch/common/langdetect/LangProfile.java delete mode 100644 src/main/java/org/xbib/elasticsearch/common/langdetect/Messages.java delete mode 100644 src/main/java/org/xbib/elasticsearch/common/langdetect/NGram.java create mode 100644 src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/LangProfile.java rename src/main/java/org/xbib/elasticsearch/{common => index/analysis}/langdetect/Language.java (93%) rename src/main/java/org/xbib/elasticsearch/{common => index/analysis}/langdetect/LanguageDetectionException.java (62%) create mode 100644 src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/NGram.java create mode 100644 src/main/java/org/xbib/elasticsearch/module/langdetect/LangdetectService.java rename src/main/java/org/xbib/elasticsearch/{plugin => module}/langdetect/RegisterLangdetectType.java (72%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/af (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/ar (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/bg (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/bn (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/cs (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/da (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/de (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/el (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/en (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/es (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/et (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/fa (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/fi (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/fr (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/gu (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/he (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/hi (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/hr (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/hu (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/id (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/it (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/ja (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/kn (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/ko (100%) create mode 100644 src/main/resources/langdetect/language.json rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/lt (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/lv (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/mk (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/ml (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/mr (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/ne (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/nl (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/no (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/pa (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/pl (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/pt (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/ro (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/ru (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/sk (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/sl (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/so (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/sq (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/sv (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/sw (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/ta (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/te (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/th (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/tl (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/tr (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/uk (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/ur (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/vi (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/zh-cn (100%) rename src/main/resources/{org/xbib/elasticsearch/common => }/langdetect/zh-tw (100%) delete mode 100644 src/main/resources/org/xbib/elasticsearch/common/langdetect/languages.properties delete mode 100644 src/main/resources/org/xbib/elasticsearch/common/langdetect/messages.properties delete mode 100644 src/test/java/org/xbib/elasticsearch/common/langdetect/DetectorTest.java delete mode 100644 src/test/java/org/xbib/elasticsearch/common/langdetect/SimpleDetectorTest.java rename src/test/java/org/xbib/elasticsearch/{common => index/mapper}/langdetect/DetectLanguageTest.java (57%) create mode 100644 src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/DetectorTest.java rename src/test/java/org/xbib/elasticsearch/{common => index/mapper}/langdetect/LangProfileTest.java (77%) rename src/test/java/org/xbib/elasticsearch/{module => index/mapper}/langdetect/LangdetectMappingTest.java (50%) rename src/test/java/org/xbib/elasticsearch/{common => index/mapper}/langdetect/LanguageTest.java (67%) rename src/test/java/org/xbib/elasticsearch/{common => index/mapper}/langdetect/NGramTest.java (96%) create mode 100644 src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/SimpleDetectorTest.java delete mode 100644 src/test/resources/base64-mapping.json delete mode 100644 src/test/resources/log4j.properties create mode 100644 src/test/resources/log4j2.xml rename src/test/resources/{ => org/xbib/elasticsearch/index/mapper/langdetect}/base64-decoded.txt (100%) create mode 100644 src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/base64-mapping.json rename src/test/resources/{ => org/xbib/elasticsearch/index/mapper/langdetect}/base64.txt (100%) rename src/test/resources/{ => org/xbib/elasticsearch/index/mapper/langdetect}/chinese.txt (100%) rename src/test/resources/{ => org/xbib/elasticsearch/index/mapper/langdetect}/english.txt (100%) rename src/test/resources/{ => org/xbib/elasticsearch/index/mapper/langdetect}/japanese.txt (100%) rename src/test/resources/{ => org/xbib/elasticsearch/index/mapper/langdetect}/korean.txt (100%) rename src/test/resources/{ => org/xbib/elasticsearch/index/mapper/langdetect}/simple-mapping.json (100%) diff --git a/README.md b/README.md index ee5f4fd..b01d65a 100644 --- a/README.md +++ b/README.md @@ -82,15 +82,16 @@ zh-tw ![Travis](https://travis-ci.org/jprante/elasticsearch-langdetect.png) -| Elasticsearch | Plugin | Release date | +| Elasticsearch | Plugin | Release date | | -------------- | -------------- | ------------ | +| 1.4.0 | 1.4.0.0 | Nov 14, 2014 | | 1.3.1 | 1.3.0.0 | Jul 30, 2014 | | 1.2.1 | 1.2.1.1 | Jun 18, 2014 | ## Installation - ./bin/plugin -install langdetect -url http://xbib.org/repository/org/xbib/elasticsearch/plugin/elasticsearch-langdetect/1.3.0.0/elasticsearch-langdetect-1.3.0.0-plugin.zip + ./bin/plugin -install langdetect -url http://xbib.org/repository/org/xbib/elasticsearch/plugin/elasticsearch-langdetect/1.4.0.0/elasticsearch-langdetect-1.4.0.0-plugin.zip Do not forget to restart the node after installing. @@ -98,6 +99,7 @@ Do not forget to restart the node after installing. | File | SHA1 | | --------------------------------------------- | -----------------------------------------| +| elasticsearch-langdetect-1.4.0.0-plugin.zip | f95361fa1a81b2681e2e9002b03ca6aad57f3012 | | elasticsearch-langdetect-1.3.0.0-plugin.zip | e2dd56c72f19cec861141becd8beb18d7bb26ee6 | | elasticsearch-langdetect-1.2.1.1-plugin.zip | cc3a0d5ccecf1210b96771dcb5c9935176e1cc35 | | elasticsearch-langdetect-1.2.1.0.zip | c07b84e798e284ee238554c2b2bc065dfdb9114d | diff --git a/pom.xml b/pom.xml index ee6372b..2c8a200 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ org.xbib.elasticsearch.plugin elasticsearch-langdetect - 1.3.0.0 + 1.4.0.0 jar @@ -69,7 +69,8 @@ github UTF-8 1.7 - 1.3.1 + 1.4.0 + 2.4.2 @@ -85,15 +86,15 @@ com.fasterxml.jackson.core jackson-databind - 2.4.1 + ${jackson.version} jar compile - org.testng - testng - 6.8.8 + junit + junit + 4.11 jar test @@ -141,7 +142,6 @@ maven-surefire-plugin 2.17 - false **/*Test.java diff --git a/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectRequest.java b/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectRequest.java index 5f123de..581373d 100644 --- a/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectRequest.java +++ b/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectRequest.java @@ -1,7 +1,6 @@ package org.xbib.elasticsearch.action.langdetect; import org.elasticsearch.action.support.single.custom.SingleCustomOperationRequest; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -9,29 +8,29 @@ public class LangdetectRequest extends SingleCustomOperationRequest { - BytesReference text; + String text; public LangdetectRequest() { } - public LangdetectRequest setText(BytesReference text) { + public LangdetectRequest setText(String text) { this.text = text; return this; } - public BytesReference getText() { + public String getText() { return text; } @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); - text = in.readBytesReference(); + text = in.readString(); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeBytesReference(text); + out.writeString(text); } } diff --git a/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectRequestBuilder.java b/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectRequestBuilder.java index dd56dce..30ce464 100644 --- a/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectRequestBuilder.java +++ b/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectRequestBuilder.java @@ -10,6 +10,11 @@ public LangdetectRequestBuilder(IndicesAdminClient client) { super(client, new LangdetectRequest()); } + public LangdetectRequestBuilder setText(String string) { + request.setText(string); + return this; + } + @Override protected void doExecute(ActionListener listener) { client.execute(LangdetectAction.INSTANCE, request, listener); diff --git a/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectResponse.java b/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectResponse.java index 3835120..bffe5a0 100644 --- a/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectResponse.java +++ b/src/main/java/org/xbib/elasticsearch/action/langdetect/LangdetectResponse.java @@ -4,7 +4,7 @@ import org.elasticsearch.common.xcontent.StatusToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.rest.RestStatus; -import org.xbib.elasticsearch.common.langdetect.Language; +import org.xbib.elasticsearch.index.analysis.langdetect.Language; import java.io.IOException; import java.util.List; @@ -18,8 +18,9 @@ public class LangdetectResponse extends ActionResponse implements StatusToXConte public LangdetectResponse() { } - public LangdetectResponse(List languages) { + public LangdetectResponse setLanguages(List languages) { this.languages = languages; + return this; } public List getLanguages() { diff --git a/src/main/java/org/xbib/elasticsearch/action/langdetect/TransportLangdetectAction.java b/src/main/java/org/xbib/elasticsearch/action/langdetect/TransportLangdetectAction.java index e3bd221..4ae64cc 100644 --- a/src/main/java/org/xbib/elasticsearch/action/langdetect/TransportLangdetectAction.java +++ b/src/main/java/org/xbib/elasticsearch/action/langdetect/TransportLangdetectAction.java @@ -1,64 +1,37 @@ package org.xbib.elasticsearch.action.langdetect; -import org.elasticsearch.action.support.single.custom.TransportSingleCustomOperationAction; -import org.elasticsearch.cluster.ClusterService; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.block.ClusterBlockException; -import org.elasticsearch.cluster.block.ClusterBlockLevel; -import org.elasticsearch.cluster.routing.ShardsIterator; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.TransportAction; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.threadpool.ThreadPool; -import org.elasticsearch.transport.TransportService; -import org.xbib.elasticsearch.common.langdetect.Detector; -import org.xbib.elasticsearch.common.langdetect.Language; +import org.xbib.elasticsearch.index.analysis.langdetect.Language; +import org.xbib.elasticsearch.index.analysis.langdetect.LanguageDetectionException; +import org.xbib.elasticsearch.module.langdetect.LangdetectService; import java.util.List; -public class TransportLangdetectAction extends TransportSingleCustomOperationAction { +public class TransportLangdetectAction extends TransportAction { - private final Detector detector; + private final LangdetectService service; @Inject public TransportLangdetectAction(Settings settings, ThreadPool threadPool, - ClusterService clusterService, TransportService transportService) { - super(settings, LangdetectAction.NAME, threadPool, clusterService, transportService); - this.detector = new Detector(settings); + ActionFilters actionFilters, LangdetectService service) { + super(settings, LangdetectAction.NAME, threadPool, actionFilters); + this.service = service; + // start the service here + this.service.start(); } @Override - protected String executor() { - return ThreadPool.Names.GENERIC; - } - - @Override - protected ShardsIterator shards(ClusterState state, LangdetectRequest request) { - return null; // execute always locally - } - - @Override - protected LangdetectRequest newRequest() { - return new LangdetectRequest(); - } - - @Override - protected LangdetectResponse newResponse() { - return new LangdetectResponse(); - } - - @Override - protected ClusterBlockException checkGlobalBlock(ClusterState state, LangdetectRequest request) { - return state.blocks().globalBlockedException(ClusterBlockLevel.READ); - } - - @Override - protected ClusterBlockException checkRequestBlock(ClusterState state, LangdetectRequest request) { - return null; // no blocks - } - - @Override - protected LangdetectResponse shardOperation(LangdetectRequest request, int shardId) { - List langs = detector.detectAll(request.getText().toUtf8()); - return new LangdetectResponse(langs); + protected void doExecute(LangdetectRequest request, ActionListener listener) { + try { + List langs = service.detectAll(request.getText()); + listener.onResponse(new LangdetectResponse().setLanguages(langs)); + } catch (LanguageDetectionException e) { + listener.onFailure(e); + } } } diff --git a/src/main/java/org/xbib/elasticsearch/common/langdetect/Detector.java b/src/main/java/org/xbib/elasticsearch/common/langdetect/Detector.java deleted file mode 100644 index 5eb5652..0000000 --- a/src/main/java/org/xbib/elasticsearch/common/langdetect/Detector.java +++ /dev/null @@ -1,308 +0,0 @@ -package org.xbib.elasticsearch.common.langdetect; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.component.AbstractLifecycleComponent; -import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.common.settings.Settings; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.ResourceBundle; -import java.util.regex.Pattern; - -public class Detector extends AbstractLifecycleComponent { - - private static final double ALPHA_DEFAULT = 0.5; - - private static final double ALPHA_WIDTH = 0.05; - - private static final int ITERATION_LIMIT = 1000; - - private static final double PROB_THRESHOLD = 0.1; - - private static final double CONV_THRESHOLD = 0.99999; - - private static final int BASE_FREQ = 10000; - - private static final String UNKNOWN_LANG = "unknown"; - - private Map wordLangProbMap = new HashMap(); - - private List langlist = new LinkedList(); - - private double alpha; - - private int n_trial; - - private double[] priorMap; - - public Detector() { - super(ImmutableSettings.EMPTY); - } - - @Inject - public Detector(Settings settings) { - super(settings); - try { - loadDefaultProfiles(); - } catch (IOException e) { - throw new ElasticsearchException(e.getMessage()); - } - reset(); - } - - @Override - protected void doStart() throws ElasticsearchException { - } - - @Override - protected void doStop() throws ElasticsearchException { - } - - @Override - protected void doClose() throws ElasticsearchException { - } - - public void loadDefaultProfiles() throws IOException { - load(ResourceBundle.getBundle(getClass().getPackage().getName() + ".languages")); - reset(); - } - - public void loadProfiles(String bundleName) throws IOException { - load(ResourceBundle.getBundle(bundleName)); - } - - public void load(ResourceBundle bundle) throws IOException { - Enumeration en = bundle.getKeys(); - int index = 0; - int size = bundle.keySet().size(); - while (en.hasMoreElements()) { - String line = en.nextElement(); - InputStream in = getClass().getResourceAsStream(line); - if (in == null) { - throw new IOException("i/o error in profile locading"); - } - loadProfile(in, index++, size); - } - } - - public void loadProfile(InputStream in, int index, int langsize) throws IOException { - ObjectMapper mapper = new ObjectMapper(); - LangProfile profile = mapper.readValue(in, LangProfile.class); - addProfile(profile, index, langsize); - } - - public void addProfile(LangProfile profile, int index, int langsize) throws IOException { - String lang = profile.name; - if (langlist.contains(lang)) { - throw new IOException("duplicate the same language profile"); - } - langlist.add(lang); - for (String word : profile.freq.keySet()) { - if (!wordLangProbMap.containsKey(word)) { - wordLangProbMap.put(word, new double[langsize]); - } - int length = word.length(); - if (length >= 1 && length <= 3) { - double prob = profile.freq.get(word).doubleValue() / profile.n_words[length - 1]; - wordLangProbMap.get(word)[index] = prob; - } - } - } - - public Detector setWordLangProbMap(Map wordLangProbMap) { - this.wordLangProbMap = wordLangProbMap; - return this; - } - - public Detector setLangList(List langlist) { - this.langlist = langlist; - return this; - } - - public List getLangList() { - return Collections.unmodifiableList(langlist); - } - - public final void reset() { - this.priorMap = null; - this.alpha = ALPHA_DEFAULT; - this.n_trial = 7; - } - - /** - * Set smoothing parameter. The default value is 0.5(i.e. Expected - * Likelihood Estimate). - * - * @param alpha the smoothing parameter - */ - public void setAlpha(double alpha) { - this.alpha = alpha; - } - - /** - * Set prior information about language probabilities. - * - * @param priorMap the priorMap to set - * @throws LanguageDetectionException - */ - public void setPriorMap(HashMap priorMap) throws LanguageDetectionException { - this.priorMap = new double[langlist.size()]; - double sump = 0; - for (int i = 0; i < this.priorMap.length; ++i) { - String lang = langlist.get(i); - if (priorMap.containsKey(lang)) { - double p = priorMap.get(lang); - if (p < 0) { - throw new LanguageDetectionException("Prior probability must be non-negative"); - } - this.priorMap[i] = p; - sump += p; - } - } - if (sump <= 0) { - throw new LanguageDetectionException("More one of prior probability must be non-zero"); - } - for (int i = 0; i < this.priorMap.length; ++i) { - this.priorMap[i] /= sump; - } - } - - private final static Pattern word = Pattern.compile("[\\P{IsWord}]", Pattern.UNICODE_CHARACTER_CLASS); - - /** - * Detect language of the target text and return the language name which has - * the highest probability. - * - * @return detected language name which has most probability. - */ - public String detect(String text) { - List probabilities = - detectAll(text.replaceAll(word.pattern(), " ")); - //detectAll(normalize(text)); - if (probabilities.size() > 0) { - return probabilities.get(0).getLanguage(); - } - return UNKNOWN_LANG; - } - - public List detectAll(String text) { - return sortProbability(detectBlock(/*normalize(text)*/text.replaceAll(word.pattern(), " "))); - } - - private static final double[] NO_DOUBLE = {}; - - private double[] detectBlock(String text) { - //text = clean(text); - List ngrams = extractNGrams(text); - // we should allow empty text field, anyway never raise an exception during indexing process - // since UNKNOWN_LANG exists - if (ngrams.isEmpty()) { - return NO_DOUBLE; - } - double[] langprob = new double[langlist.size()]; - Random rand = new Random(); - rand.setSeed(0L); - - for (int t = 0; t < n_trial; ++t) { - double[] prob = initProbability(); - double a = this.alpha + rand.nextGaussian() * ALPHA_WIDTH; - for (int i = 0; ; ++i) { - int r = rand.nextInt(ngrams.size()); - updateLangProb(prob, ngrams.get(r), a); - if (i % 5 == 0) { - if (normalizeProb(prob) > CONV_THRESHOLD || i >= ITERATION_LIMIT) { - break; - } - } - } - for (int j = 0; j < langprob.length; ++j) { - langprob[j] += prob[j] / n_trial; - } - } - return langprob; - } - - private double[] initProbability() { - double[] prob = new double[langlist.size()]; - if (priorMap != null) { - for (int i = 0; i < prob.length; ++i) { - prob[i] = priorMap[i]; - } - } else { - for (int i = 0; i < prob.length; ++i) { - prob[i] = 1.0 / langlist.size(); - } - } - return prob; - } - - private List extractNGrams(String text) { - List list = new ArrayList(); - NGram ngram = new NGram(); - for (int i = 0; i < text.length(); ++i) { - ngram.addChar(text.charAt(i)); - for (int n = 1; n <= NGram.N_GRAM; ++n) { - String w = ngram.get(n); - if (w != null && wordLangProbMap.containsKey(w)) { - list.add(w); - } - } - } - return list; - } - - private boolean updateLangProb(double[] prob, String word, double alpha) { - if (word == null || !wordLangProbMap.containsKey(word)) { - return false; - } - double[] langProbMap = wordLangProbMap.get(word); - double weight = alpha / BASE_FREQ; - for (int i = 0; i < prob.length; ++i) { - prob[i] *= weight + langProbMap[i]; - } - return true; - } - - private double normalizeProb(double[] prob) { - double maxp = 0, sump = 0; - for (int i = 0; i < prob.length; ++i) { - sump += prob[i]; - } - for (int i = 0; i < prob.length; ++i) { - double p = prob[i] / sump; - if (maxp < p) { - maxp = p; - } - prob[i] = p; - } - return maxp; - } - - private List sortProbability(double[] prob) { - List list = new ArrayList(); - for (int j = 0; j < prob.length; ++j) { - double p = prob[j]; - if (p > PROB_THRESHOLD) { - for (int i = 0; i <= list.size(); ++i) { - if (i == list.size() || list.get(i).getProbability() < p) { - list.add(i, new Language(langlist.get(j), p)); - break; - } - } - } - } - return list; - } - -} diff --git a/src/main/java/org/xbib/elasticsearch/common/langdetect/LangProfile.java b/src/main/java/org/xbib/elasticsearch/common/langdetect/LangProfile.java deleted file mode 100644 index 7bc8472..0000000 --- a/src/main/java/org/xbib/elasticsearch/common/langdetect/LangProfile.java +++ /dev/null @@ -1,76 +0,0 @@ -package org.xbib.elasticsearch.common.langdetect; - -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; - -public class LangProfile { - - private static final int MINIMUM_FREQ = 2; - - private static final int LESS_FREQ_RATIO = 100000; - - public String name = null; - - public Map freq = new HashMap(); - - public int[] n_words = new int[NGram.N_GRAM]; - - public LangProfile() { - } - - public LangProfile(String name) { - this.name = name; - } - - public void add(String gram) { - if (name == null || gram == null) { - return; - } - int len = gram.length(); - if (len < 1 || len > NGram.N_GRAM) { - return; - } - ++n_words[len - 1]; - if (freq.containsKey(gram)) { - freq.put(gram, freq.get(gram) + 1); - } else { - freq.put(gram, 1); - } - } - - public void omitLessFreq() { - if (name == null) { - return; - } - int threshold = n_words[0] / LESS_FREQ_RATIO; - if (threshold < MINIMUM_FREQ) { - threshold = MINIMUM_FREQ; - } - Set keys = freq.keySet(); - int roman = 0; - for (Iterator i = keys.iterator(); i.hasNext(); ) { - String key = i.next(); - int count = freq.get(key); - if (count <= threshold) { - n_words[key.length() - 1] -= count; - i.remove(); - } else { - if (key.matches("^[A-Za-z]$")) { - roman += count; - } - } - } - if (roman < n_words[0] / 3) { - Set keys2 = freq.keySet(); - for (Iterator i = keys2.iterator(); i.hasNext(); ) { - String key = i.next(); - if (key.matches(".*[A-Za-z].*")) { - n_words[key.length() - 1] -= freq.get(key); - i.remove(); - } - } - } - } -} diff --git a/src/main/java/org/xbib/elasticsearch/common/langdetect/Messages.java b/src/main/java/org/xbib/elasticsearch/common/langdetect/Messages.java deleted file mode 100644 index 4e71e79..0000000 --- a/src/main/java/org/xbib/elasticsearch/common/langdetect/Messages.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.xbib.elasticsearch.common.langdetect; - -import java.util.MissingResourceException; -import java.util.ResourceBundle; - -public class Messages { - - private static final ResourceBundle RESOURCE_BUNDLE = - ResourceBundle.getBundle(Messages.class.getPackage().getName() + ".messages"); - - private Messages() { - } - - public static String getString(String key) { - try { - return RESOURCE_BUNDLE.getString(key); - } catch (MissingResourceException e) { - return '!' + key + '!'; - } - } -} diff --git a/src/main/java/org/xbib/elasticsearch/common/langdetect/NGram.java b/src/main/java/org/xbib/elasticsearch/common/langdetect/NGram.java deleted file mode 100644 index ddda5aa..0000000 --- a/src/main/java/org/xbib/elasticsearch/common/langdetect/NGram.java +++ /dev/null @@ -1,173 +0,0 @@ -package org.xbib.elasticsearch.common.langdetect; - -import java.lang.Character.UnicodeBlock; -import java.util.HashMap; -import java.util.Map; - -public class NGram { - - public final static Map cjk_map = new HashMap(); - - public final static int N_GRAM = 3; - - private final static String LATIN1_EXCLUDED = Messages.getString("NGram.LATIN1_EXCLUDE"); - - private StringBuilder grams; - - private boolean capitalword; - - public NGram() { - grams = new StringBuilder(" "); - capitalword = false; - } - - public void addChar(char ch) { - ch = normalize(ch); - char lastchar = grams.charAt(grams.length() - 1); - if (lastchar == ' ') { - grams = new StringBuilder(" "); - capitalword = false; - if (ch == ' ') { - return; - } - } else if (grams.length() >= N_GRAM) { - grams.deleteCharAt(0); - } - grams.append(ch); - if (Character.isUpperCase(ch)) { - if (Character.isUpperCase(lastchar)) { - capitalword = true; - } - } else { - capitalword = false; - } - } - - public String get(int n) { - if (capitalword) { - return null; - } - int len = grams.length(); - if (n < 1 || n > 3 || len < n) { - return null; - } - if (n == 1) { - char ch = grams.charAt(len - 1); - if (ch == ' ') { - return null; - } - return Character.toString(ch); - } else { - return grams.substring(len - n, len); - } - } - - public static char normalize(char ch) { - Character.UnicodeBlock block = Character.UnicodeBlock.of(ch); - if (block == UnicodeBlock.BASIC_LATIN) { - if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') { - ch = ' '; - } - } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) { - if (LATIN1_EXCLUDED.indexOf(ch) >= 0) { - ch = ' '; - } - } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) { - ch = ' '; - } else if (block == UnicodeBlock.ARABIC) { - if (ch == '\u06cc') { - ch = '\u064a'; - } - } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) { - if (ch >= '\u1ea0') { - ch = '\u1ec3'; - } - } else if (block == UnicodeBlock.HIRAGANA) { - ch = '\u3042'; - } else if (block == UnicodeBlock.KATAKANA) { - ch = '\u30a2'; - } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) { - ch = '\u3105'; - } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) { - if (cjk_map.containsKey(ch)) { - ch = cjk_map.get(ch); - } - } else if (block == UnicodeBlock.HANGUL_SYLLABLES) { - ch = '\uac00'; - } - return ch; - } - - static final String[] CJK_CLASS = {Messages.getString("NGram.KANJI_1_0"), Messages.getString("NGram.KANJI_1_2"), - Messages.getString("NGram.KANJI_1_4"), Messages.getString("NGram.KANJI_1_8"), - Messages.getString("NGram.KANJI_1_11"), Messages.getString("NGram.KANJI_1_12"), - Messages.getString("NGram.KANJI_1_13"), Messages.getString("NGram.KANJI_1_14"), - Messages.getString("NGram.KANJI_1_16"), Messages.getString("NGram.KANJI_1_18"), - Messages.getString("NGram.KANJI_1_22"), Messages.getString("NGram.KANJI_1_27"), - Messages.getString("NGram.KANJI_1_29"), Messages.getString("NGram.KANJI_1_31"), - Messages.getString("NGram.KANJI_1_35"), Messages.getString("NGram.KANJI_2_0"), - Messages.getString("NGram.KANJI_2_1"), Messages.getString("NGram.KANJI_2_4"), - Messages.getString("NGram.KANJI_2_9"), Messages.getString("NGram.KANJI_2_10"), - Messages.getString("NGram.KANJI_2_11"), Messages.getString("NGram.KANJI_2_12"), - Messages.getString("NGram.KANJI_2_13"), Messages.getString("NGram.KANJI_2_15"), - Messages.getString("NGram.KANJI_2_16"), Messages.getString("NGram.KANJI_2_18"), - Messages.getString("NGram.KANJI_2_21"), Messages.getString("NGram.KANJI_2_22"), - Messages.getString("NGram.KANJI_2_23"), Messages.getString("NGram.KANJI_2_28"), - Messages.getString("NGram.KANJI_2_29"), Messages.getString("NGram.KANJI_2_30"), - Messages.getString("NGram.KANJI_2_31"), Messages.getString("NGram.KANJI_2_32"), - Messages.getString("NGram.KANJI_2_35"), Messages.getString("NGram.KANJI_2_36"), - Messages.getString("NGram.KANJI_2_37"), Messages.getString("NGram.KANJI_2_38"), - Messages.getString("NGram.KANJI_3_1"), Messages.getString("NGram.KANJI_3_2"), - Messages.getString("NGram.KANJI_3_3"), Messages.getString("NGram.KANJI_3_4"), - Messages.getString("NGram.KANJI_3_5"), Messages.getString("NGram.KANJI_3_8"), - Messages.getString("NGram.KANJI_3_9"), Messages.getString("NGram.KANJI_3_11"), - Messages.getString("NGram.KANJI_3_12"), Messages.getString("NGram.KANJI_3_13"), - Messages.getString("NGram.KANJI_3_15"), Messages.getString("NGram.KANJI_3_16"), - Messages.getString("NGram.KANJI_3_18"), Messages.getString("NGram.KANJI_3_19"), - Messages.getString("NGram.KANJI_3_22"), Messages.getString("NGram.KANJI_3_23"), - Messages.getString("NGram.KANJI_3_27"), Messages.getString("NGram.KANJI_3_29"), - Messages.getString("NGram.KANJI_3_30"), Messages.getString("NGram.KANJI_3_31"), - Messages.getString("NGram.KANJI_3_32"), Messages.getString("NGram.KANJI_3_35"), - Messages.getString("NGram.KANJI_3_36"), Messages.getString("NGram.KANJI_3_37"), - Messages.getString("NGram.KANJI_3_38"), Messages.getString("NGram.KANJI_4_0"), - Messages.getString("NGram.KANJI_4_9"), Messages.getString("NGram.KANJI_4_10"), - Messages.getString("NGram.KANJI_4_16"), Messages.getString("NGram.KANJI_4_17"), - Messages.getString("NGram.KANJI_4_18"), Messages.getString("NGram.KANJI_4_22"), - Messages.getString("NGram.KANJI_4_24"), Messages.getString("NGram.KANJI_4_28"), - Messages.getString("NGram.KANJI_4_34"), Messages.getString("NGram.KANJI_4_39"), - Messages.getString("NGram.KANJI_5_10"), Messages.getString("NGram.KANJI_5_11"), - Messages.getString("NGram.KANJI_5_12"), Messages.getString("NGram.KANJI_5_13"), - Messages.getString("NGram.KANJI_5_14"), Messages.getString("NGram.KANJI_5_18"), - Messages.getString("NGram.KANJI_5_26"), Messages.getString("NGram.KANJI_5_29"), - Messages.getString("NGram.KANJI_5_34"), Messages.getString("NGram.KANJI_5_39"), - Messages.getString("NGram.KANJI_6_0"), Messages.getString("NGram.KANJI_6_3"), - Messages.getString("NGram.KANJI_6_9"), Messages.getString("NGram.KANJI_6_10"), - Messages.getString("NGram.KANJI_6_11"), Messages.getString("NGram.KANJI_6_12"), - Messages.getString("NGram.KANJI_6_16"), Messages.getString("NGram.KANJI_6_18"), - Messages.getString("NGram.KANJI_6_20"), Messages.getString("NGram.KANJI_6_21"), - Messages.getString("NGram.KANJI_6_22"), Messages.getString("NGram.KANJI_6_23"), - Messages.getString("NGram.KANJI_6_25"), Messages.getString("NGram.KANJI_6_28"), - Messages.getString("NGram.KANJI_6_29"), Messages.getString("NGram.KANJI_6_30"), - Messages.getString("NGram.KANJI_6_32"), Messages.getString("NGram.KANJI_6_34"), - Messages.getString("NGram.KANJI_6_35"), Messages.getString("NGram.KANJI_6_37"), - Messages.getString("NGram.KANJI_6_39"), Messages.getString("NGram.KANJI_7_0"), - Messages.getString("NGram.KANJI_7_3"), Messages.getString("NGram.KANJI_7_6"), - Messages.getString("NGram.KANJI_7_7"), Messages.getString("NGram.KANJI_7_9"), - Messages.getString("NGram.KANJI_7_11"), Messages.getString("NGram.KANJI_7_12"), - Messages.getString("NGram.KANJI_7_13"), Messages.getString("NGram.KANJI_7_16"), - Messages.getString("NGram.KANJI_7_18"), Messages.getString("NGram.KANJI_7_19"), - Messages.getString("NGram.KANJI_7_20"), Messages.getString("NGram.KANJI_7_21"), - Messages.getString("NGram.KANJI_7_23"), Messages.getString("NGram.KANJI_7_25"), - Messages.getString("NGram.KANJI_7_28"), Messages.getString("NGram.KANJI_7_29"), - Messages.getString("NGram.KANJI_7_32"), Messages.getString("NGram.KANJI_7_33"), - Messages.getString("NGram.KANJI_7_35"), Messages.getString("NGram.KANJI_7_37"),}; - - static { - for (String cjk_list : CJK_CLASS) { - char representative = cjk_list.charAt(0); - for (int i = 0; i < cjk_list.length(); ++i) { - cjk_map.put(cjk_list.charAt(i), representative); - } - } - } -} diff --git a/src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/LangProfile.java b/src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/LangProfile.java new file mode 100644 index 0000000..9eb4ea4 --- /dev/null +++ b/src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/LangProfile.java @@ -0,0 +1,64 @@ +package org.xbib.elasticsearch.index.analysis.langdetect; + +import java.util.HashMap; +import java.util.Map; + +/** + * This class is used by ObjectMapper, it requires public attributes + */ +public class LangProfile { + + public String name = null; + + public Map freq = new HashMap(); + + public int[] n_words = new int[NGram.N_GRAM]; + + public LangProfile() { + } + + public LangProfile(String name) { + this.name = name; + } + + public void add(String gram) { + if (name == null || gram == null) { + return; + } + int len = gram.length(); + if (len < 1 || len > NGram.N_GRAM) { + return; + } + ++n_words[len - 1]; + if (freq.containsKey(gram)) { + freq.put(gram, freq.get(gram) + 1); + } else { + freq.put(gram, 1); + } + } + + public void setName(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public void setNWords() { + this.n_words = n_words; + } + + public int[] getNWords() { + return n_words; + } + + public void setFreq(Map freq) { + this.freq = freq; + } + + public Map getFreq() { + return freq; + } + +} diff --git a/src/main/java/org/xbib/elasticsearch/common/langdetect/Language.java b/src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/Language.java similarity index 93% rename from src/main/java/org/xbib/elasticsearch/common/langdetect/Language.java rename to src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/Language.java index 6a998bf..ece8c40 100644 --- a/src/main/java/org/xbib/elasticsearch/common/langdetect/Language.java +++ b/src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/Language.java @@ -1,4 +1,4 @@ -package org.xbib.elasticsearch.common.langdetect; +package org.xbib.elasticsearch.index.analysis.langdetect; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; diff --git a/src/main/java/org/xbib/elasticsearch/common/langdetect/LanguageDetectionException.java b/src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/LanguageDetectionException.java similarity index 62% rename from src/main/java/org/xbib/elasticsearch/common/langdetect/LanguageDetectionException.java rename to src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/LanguageDetectionException.java index daadf3e..15b2e3a 100644 --- a/src/main/java/org/xbib/elasticsearch/common/langdetect/LanguageDetectionException.java +++ b/src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/LanguageDetectionException.java @@ -1,10 +1,10 @@ -package org.xbib.elasticsearch.common.langdetect; +package org.xbib.elasticsearch.index.analysis.langdetect; import java.io.IOException; public class LanguageDetectionException extends IOException { - private final static long serialVersionUID = -1L; + private static final long serialVersionUID = -1; public LanguageDetectionException(String message) { super(message); diff --git a/src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/NGram.java b/src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/NGram.java new file mode 100644 index 0000000..ce120b9 --- /dev/null +++ b/src/main/java/org/xbib/elasticsearch/index/analysis/langdetect/NGram.java @@ -0,0 +1,239 @@ +package org.xbib.elasticsearch.index.analysis.langdetect; + +import java.lang.Character.UnicodeBlock; +import java.util.HashMap; +import java.util.Map; + +public class NGram { + + public final static Map cjk_map = new HashMap(); + + public final static int N_GRAM = 3; + + private final static String LATIN1_EXCLUDED = "\u00A0\u00AB\u00B0\u00BB"; + + private StringBuilder grams; + + private boolean capitalword; + + public NGram() { + grams = new StringBuilder(" "); + capitalword = false; + } + + public void addChar(char ch) { + ch = normalize(ch); + char lastchar = grams.charAt(grams.length() - 1); + if (lastchar == ' ') { + grams = new StringBuilder(" "); + capitalword = false; + if (ch == ' ') { + return; + } + } else if (grams.length() >= N_GRAM) { + grams.deleteCharAt(0); + } + grams.append(ch); + if (Character.isUpperCase(ch)) { + if (Character.isUpperCase(lastchar)) { + capitalword = true; + } + } else { + capitalword = false; + } + } + + public String get(int n) { + if (capitalword) { + return null; + } + int len = grams.length(); + if (n < 1 || n > 3 || len < n) { + return null; + } + if (n == 1) { + char ch = grams.charAt(len - 1); + if (ch == ' ') { + return null; + } + return Character.toString(ch); + } else { + return grams.substring(len - n, len); + } + } + + public static char normalize(char ch) { + Character.UnicodeBlock block = Character.UnicodeBlock.of(ch); + if (block == UnicodeBlock.BASIC_LATIN) { + if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') { + ch = ' '; + } + } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) { + if (LATIN1_EXCLUDED.indexOf(ch) >= 0) { + ch = ' '; + } + } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) { + ch = ' '; + } else if (block == UnicodeBlock.ARABIC) { + if (ch == '\u06cc') { + ch = '\u064a'; + } + } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) { + if (ch >= '\u1ea0') { + ch = '\u1ec3'; + } + } else if (block == UnicodeBlock.HIRAGANA) { + ch = '\u3042'; + } else if (block == UnicodeBlock.KATAKANA) { + ch = '\u30a2'; + } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) { + ch = '\u3105'; + } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) { + if (cjk_map.containsKey(ch)) { + ch = cjk_map.get(ch); + } + } else if (block == UnicodeBlock.HANGUL_SYLLABLES) { + ch = '\uac00'; + } + return ch; + } + + static final String[] CJK_CLASS = { + "\u4F7C\u6934", + "\u88CF\u95B2", + "\u7027\u7DCB", + "\u4E80\u4E9C\u4EEE\u5263\u5264\u5270\u52C5\u52E7\u52F2\u53B3\u5449\u58CA\u58CC\u5968\u59C9\u59EB\u5D8B\u5DE3\u5E30\u6075\u622F\u623B\u6255\u629C\u629E\u62DD\u62E1\u633F\u635C\u63FA\u6442\u6589\u658E\u6669\u66A6\u66FD\u6804\u685C\u6B69\u6B6F\u6BBB\u6C37\u6C5A\u6D44\u6E09\u6E0B\u6E13\u6EDD\u713C\u72A0\u731F\u7363\u7A32\u7A42\u7A93\u7ADC\u7C8B\u7C9B\u7DD1\u7E01\u7E04\u7E26\u7E4A\u7E4B\u7E70\u8074\u8107\u8133\u81D3\u820E\u8217\u8358\u83D3\u85AC\u8987\u899A\u8B21\u8B72\u8B83\u8CDB\u9045\u90F7\u91C8\u9271\u9283\u92AD\u9665\u967A\u96A0\u96A3\u96B7\u970A\u983C\u9854\u9855\u99C6\u9A12\u9ED9\u9F62", + "\u67D8\u831C", + "\u5742\u57FC\u5800", + "\u4E3C\u4E98\u4FE3\u4FF5\u5072\u51A8\u53A9\u5451\u546A\u5504\u5516\u55A9\u55B0\u5618\u5642\u565B\u567A\u56A2\u57F4\u5840\u5841\u58F1\u59F6\u5A2F\u5B22\u5B8D\u5DCC\u5EFB\u5F10\u60A9\u60E3\u61D0\u62F6\u63B4\u63BB\u63C3\u6681\u685F\u6955\u6962\u696F\u698A\u698E\u69FB\u6A2B\u6A7F\u6B53\u6BD8\u6D99\u6E07\u7460\u7473\u7560\u7573\u758E\u7690\u7815\u783A\u7962\u7A4F\u7A63\u7AEA\u7BED\u7CA7\u7D18\u7D3A\u7E4D\u8061\u8218\u8276\u82C5\u8597\u85AB\u86CD\u874B\u88FE\u8ACF\u8B90\u8D0B\u8FBF\u9013\u9061\u914E\u9154\u918D\u9190\u91A4\u91B8\u9262\u929A\u92ED\u92F3\u932C\u96EB\u96F0\u976D\u97EE\u981A\u99C4\u9A28\u9AC4\u9B8E\u9C10\u9D0E\u9D5C\u9D8F\u9E78\u9EB9\u9EBA\u9EBF", + "\u5F66\u7984\u7985", + "\u5861\u7B25\u844E\u9419\u9D07", + "\u5039\u514E\u51E7\u51EA\u5301\u5302\u5859\u58F7\u59AC\u5C2D\u5CA8\u5EFC\u6357\u64B9\u67CA\u6802\u6834\u68BC\u6900\u6919\u691B\u69D9\u6AE8\u6D9C\u6E8C\u6F09\u6F45\u701E\u7026\u7114\u72DB\u7577\u75E9\u783F\u7895\u7A50\u7AC3\u7B48\u7B86\u7BAA\u7C7E\u7C82\u7C8D\u7CCE\u7D2C\u7F6B\u7FEB\u8557\u85AE\u86CE\u877F\u8997\u8ACC\u8CB0\u8CCE\u8FE9\u9197\u920E\u9266\u927E\u92F2\u9306\u9453\u9784\u982C\u9834\u99C8\u9BF5\u9C2F\u9D2C", + "\u6762\u6A17\u887F", + "\u4E21\u4E57\u4ECF\u4F1D\u4FA1\u4FF3\u5024\u50CD\u5150\u5186\u51E6\u52B4\u52B9\u5358\u53CE\u55B6\u56E3\u56F2\u56F3\u570F\u5727\u5869\u5897\u58F2\u5909\u5B9F\u5BDB\u5BFE\u5C02\u5DFB\u5E2F\u5E81\u5E83\u5EC3\u5F3E\u5F93\u5FB3\u5FB4\u5FDC\u60AA\u6226\u6238\u6271\u62E0\u6319\u63B2\u6483\u64AE\u67A0\u67FB\u691C\u697D\u69D8\u6A29\u6B73\u6B74\u6BCE\u6C17\u6CA2\u6D5C\u6E08\u6E80\u702C\u7523\u767A\u770C\u7D4C\u7D75\u7D76\u7D99\u7D9A\u7DCF\u8535\u8846\u89A7\u89B3\u8A33\u8AAC\u8AAD\u8C4A\u8EE2\u8EFD\u8FBA\u8FBC\u9244\u9332\u95A2\u95D8\u96D1\u99C5\u9A13\u9ED2", + "\u4F0E\u4FFA\u5036\u53E1\u54B2\u5506\u583A\u5C3B\u5CAC\u5CE0\u5CEF\u6803\u68B6\u6A0B\u6A8E\u73C2\u7551\u7826\u7881\u79B0\u7B39\u8429\u8599\u8FBB\u9162\u95C7\u9688\u96BC\u9AEA\u9DF2", + "\u5553\u938C", + "\u51B4\u564C\u57DC\u5B2C\u6822\u685D\u690B\u6973\u6C93\u7511\u7887\u7A17\u83D6\u847A\u8494\u8526\u854E\u85C1\u86F8\u88B4\u93A7\u9B92\u9C39\u9C48\u9C52", + "\u4E2B\u4EC3\u4F09\u4F57\u4F6F\u4F70\u4FD1\u4FDA\u500C\u5043\u516E\u5189\u5241\u530D\u5310\u5412\u54AB\u54AF\u5514\u5556\u55B1\u561F\u573B\u586D\u587D\u58C5\u58D1\u5914\u5A62\u5A6A\u5AE6\u5B40\u5B5B\u5B70\u5BB8\u5CD2\u5D01\u5D34\u5E11\u5EA0\u5F0B\u5F2D\u5F87\u607F\u621B\u6221\u6289\u63A3\u6452\u646D\u64D8\u652B\u6600\u6631\u6641\u66F7\u6773\u67B8\u67DD\u67DE\u6829\u68FB\u69AD\u6A47\u6C10\u6C68\u6C74\u6C85\u6CD3\u6D31\u6D93\u6D94\u6DB8\u6DBF\u6DC5\u6E6E\u6EA7\u6EB4\u6EC2\u6F2A\u6F2F\u6FB9\u6FC2\u6FDB\u6FEE\u70AF\u70FD\u7166\u726F\u729B\u739F\u73DE\u740A\u746D\u749C\u749F\u74E0\u759D\u75A3\u75CD\u75DE\u7600\u7620\u7688\u7738\u7762\u776B\u777D\u77E3\u781D\u7837\u78A3\u7946\u7B60\u7F44\u7F54\u7F5F\u7FAF\u8026\u807F\u80C4\u80DB\u80ED\u81E7\u824B\u82B7\u82E3\u8392\u846D\u84D3\u8548\u85B9\u86DE\u873F\u8753\u8782\u87AB\u87B3\u87D1\u87E0\u87FE\u8821\u88D8\u88E8\u8913\u891A\u892B\u8983\u8C3F\u8C49\u8C82\u8D6D\u8DE4\u8E1D\u8E1E\u8E7C\u8FE5\u8FE8\u9005\u9035\u9050\u9082\u9083\u9095\u90E2\u911E\u91AE\u91B4\u93D6\u9621\u968D\u96B9\u96D2\u9711\u9713\u973E\u9AB0\u9AB7\u9AE6\u9B03\u9B23\u9EDC\u9EEF", + "\u4E82\u4F48\u4F54\u50F9\u5167\u528D\u52DE\u532F\u537B\u53C3\u5433\u555F\u55AE\u56B4\u570D\u5716\u58D3\u58DE\u5920\u5967\u5A1B\u5BEB\u5BEC\u5C08\u5C0D\u5C46\u5C6C\u5CFD\u5E36\u5E6B\u5EC8\u5EF3\u5F48\u5F91\u5F9E\u5FB5\u6046\u60E1\u61F7\u6232\u6236\u64C7\u64CA\u64D4\u64DA\u64F4\u651D\u6578\u65B7\u6649\u6A13\u6A23\u6A6B\u6A94\u6AA2\u6B0A\u6B50\u6B61\u6B72\u6B77\u6B78\u6C92\u6EAB\u6EFF\u6FD5\u6FDF\u71DF\u722D\u72C0\u734E\u737B\u746A\u7522\u773E\u78BC\u7A69\u7C3D\u7CB5\u7D55\u7D72\u7DA0\u7DAB\u7DE3\u7E5E\u7E6A\u7E7C\u7E8C\u8072\u807D\u8085\u812B\u8166\u8173\u81D8\u8209\u820A\u8332\u838A\u840A\u85E5\u860B\u8655\u865B\u88DD\u89BA\u89BD\u89C0\u8AAA\u8B6F\u8B7D\u8B8A\u8B93\u8C50\u8CF4\u8E64\u8F15\u8F49\u8FA6\u8FAD\u9109\u9130\u91AB\u91CB\u92B7\u9304\u9322\u95CA\u96A8\u96AA\u96B1\u96B8\u96D6\u96D9\u96DC\u9748\u975C\u986F\u9918\u99DB\u9A57\u9B25\u9EA5\u9EC3\u9EDE\u9F52", + "\u514C\u51AA\u5614\u56AE\u56C2\u582F\u58FA\u5B0C\u5D11\u5DD2\u5DD6\u5E40\u5E5F\u5EEC\u6137\u6417\u6488\u64F2\u652A\u6582\u6689\u689F\u68D7\u69D3\u6A97\u6AB8\u6ABB\u6AC3\u6ADA\u6B7F\u6BB2\u6EA5\u6EC4\u6EF2\u7009\u701D\u7028\u703E\u7165\u71BE\u721B\u7463\u7464\u7469\u7515\u7526\u75FA\u7621\u779E\u79B1\u7A1F\u7AC4\u7AC7\u7B8F\u7BE9\u7D2E\u7D68\u7D8F\u7DB8\u7DBA\u7E46\u7E79\u7F4C\u7F88\u8070\u8073\u8076\u81BE\u82BB\u83A2\u858A\u8591\u861A\u8778\u87EC\u8805\u880D\u893B\u8A1B\u8A25\u8A36\u8A85\u8AA6\u8B17\u8B28\u8CB6\u8CE4\u8D16\u8D1B\u8ECB\u9112\u9214\u9249\u93AC\u9594\u9598\u95BB\u95D5\u965E\u96B4\u97DC\u9821\u9824\u9921\u9952\u9A55\u9A5B\u9B1A\u9C13\u9D09\u9DAF\u9E1A\u9E75\u9F67", + "\u4E9F\u4F6C\u4FDE\u4FFE\u5029\u5140\u51A2\u5345\u539D\u53FB\u54C7\u5599\u560E\u561B\u563B\u566C\u5676\u5729\u574D\u57E4\u595A\u598D\u5A1F\u5A25\u5A77\u5AB2\u5AD6\u5BF0\u5C2C\u5CEA\u5E37\u5F08\u6059\u606A\u6096\u609A\u62A8\u6555\u6556\u66E6\u675E\u68E3\u69BB\u6BCB\u6BD3\u6C1F\u6C26\u6C81\u6DC4\u6DDE\u6E32\u6E44\u6E4D\u6F33\u6F7C\u6FA7\u701A\u701B\u715C\u741B\u7428\u7480\u74A8\u7504\u752C\u768B\u76CE\u78CA\u78FA\u79BA\u7C27\u8046\u81FB\u8331\u8393\u83C1\u8403\u8438\u843C\u8446\u85B0\u87D2\u8862\u8DC6\u9074\u9131\u9672\u96EF\u9704\u9706\u977C\u9ABC\u9E92\u9ECF", + "\u51BD\u5704\u7350\u73A5", + "\u4E15\u4EA2\u4F5A\u50D6\u5349\u53DF\u5484\u5958\u5B34\u5B5A\u5C91\u5E1B\u5F77\u61CB\u61FF\u620C\u620D\u622E\u6248\u6538\u660A\u664F\u678B\u67E9\u69B7\u69C3\u6CB1\u6CD7\u6D5A\u6DAA\u6DC7\u7099\u71EE\u7325\u7425\u7455\u747E\u749E\u75B5\u7678\u7693\u76C2\u77B0\u77BF\u78CB\u7957\u795A\u797A\u7A79\u7B08\u7B75\u7BB4\u7F9A\u7FB2\u7FDF\u80E5\u81BA\u8340\u837C\u8398\u8559\u85A8\u86DF\u8734\u8882\u88F4\u8936\u900D\u907D\u9642\u96C9\u9AFB\u9E9D\u9EBE", + "\u5F57\u7940", + "\u5191\u7791\u792C\u7D46", + "\u5713\u58FD\u5D17\u5D19\u5DBC\u5F4C\u6191\u64A5\u687F\u69AE\u6AFB\u6EEC\u6F3F\u6FE4\u6FF1\u6FFE\u700B\u74CA\u76E1\u76E7\u7926\u792B\u79AE\u7AA9\u7C43\u7C4C\u7C64\u7DBD\u81A0\u856D\u8594\u8606\u8A62\u8AF7\u8CC8\u8CE3\u8D99\u8F1B\u8F3B\u9059\u9127\u9264\u947D\u95A9\u97CB\u980C\u9838\u9846\u99AE\u9A19\u9B06\u9B91\u9F4A\u9F4B", + "\u4E69\u4EC4\u4EDF\u4EF3\u4F0B\u4F5E\u5000\u5028\u50E5\u513B\u5157\u51DC\u52D7\u530F\u5379\u53F5\u5471\u5477\u5555\u555C\u557B\u5594\u55B2\u55C9\u560D\u5616\u562E\u5630\u5653\u5657\u566F\u56A8\u56B6\u5820\u5880\u58CE\u58D9\u5950\u5969\u596D\u599E\u59B3\u59CD\u59D2\u5A40\u5AA7\u5ABC\u5AD7\u5AD8\u5B0B\u5B24\u5B38\u5B53\u5C5C\u5D06\u5D47\u5D94\u5D9D\u5E57\u5EC4\u5F46\u5FAC\u60BD\u60D8\u6123\u615D\u615F\u6175\u618A\u61AB\u61E3\u623E\u6308\u636B\u645F\u6519\u6595\u6698\u66B8\u67D9\u6840\u695D\u696E\u6979\u69C1\u69E8\u6AEC\u6AFA\u6B5F\u6CAC\u6CE0\u6CEF\u6D0C\u6D36\u6DD2\u6DD9\u6DE6\u6DEC\u6E5F\u6FA0\u6FEC\u7156\u71C4\u71DC\u71EC\u71FC\u720D\u7230\u7292\u7296\u72A2\u72CE\u7357\u737A\u7380\u7386\u73A8\u73EE\u743F\u74A6\u74CF\u74D4\u74DA\u755A\u75A5\u75B3\u75C2\u75E0\u75F1\u75FF\u7601\u7609\u7646\u7658\u769A\u76B0\u774F\u775C\u778B\u77BD\u77C7\u7843\u787F\u78F4\u79C8\u7A88\u7A95\u7AFD\u7B1E\u7B67\u7B9D\u7BCC\u7C0D\u7C11\u7C37\u7C40\u7C6E\u7CB3\u7CBD\u7D09\u7D31\u7D40\u7D5B\u7D70\u7D91\u7D9E\u7DB0\u7DD9\u7DF9\u7E08\u7E11\u7E1D\u7E35\u7E52\u7FB6\u7FBF\u7FEE\u8012\u801C\u8028\u8052\u8123\u8188\u81C3\u81DA\u81FE\u8210\u82BE\u83A0\u83D4\u8407\u8435\u8477\u849E\u84C6\u84CA\u85F9\u867A\u86B5\u86B6\u86C4\u8706\u8707\u870A\u8768\u87BB\u8831\u8839\u8879\u8921\u8938\u8964\u89A6\u89AC\u8A10\u8A3E\u8AC2\u8ADB\u8AF3\u8B2B\u8B41\u8B4E\u8B5F\u8B6B\u8B92\u8C55\u8C62\u8C73\u8C8A\u8C8D\u8CB2\u8CB3\u8CD2\u8CE1\u8CFB\u8D0D\u8E34\u8E7A\u8E8A\u8ED4\u8EFE\u8F0A\u8F1C\u8F1E\u8F26\u8FAE\u9088\u90C3\u90FE\u9134\u9148\u91D9\u91E9\u9238\u9239\u923D\u924D\u925A\u9296\u92AC\u92BB\u9315\u9319\u931A\u9321\u9370\u9394\u93A2\u93D8\u93E4\u943A\u9477\u9582\u958E\u95A1\u95C8\u95CC\u95D4\u9658\u966C\u970F\u973D\u9744\u975B\u9766\u97A3\u97A6\u97C1\u97C6\u980A\u9837\u9853\u9870\u98AF\u98B3\u98BA\u98E9\u98ED\u9912\u991B\u991E\u993D\u993F\u99D1\u99DF\u9A01\u9A3E\u9A43\u9A4D\u9ACF\u9AE1\u9B22\u9B58\u9C25\u9C3E\u9C54\u9C56\u9D15\u9D23\u9D89\u9DC2\u9DD3\u9E82\u9E8B\u9EA9\u9EE0\u9EF7\u9F07\u9F2F\u9F34\u9F3E\u9F5F\u9F6C", + "\u5155\u520E\u55DF\u56C0\u56C1\u5793\u5FD6\u5FF8\u6029\u60FA\u613E\u6147\u615A\u62C8\u6384\u6883\u6894\u68F9\u6AA3\u6AAE\u6AC2\u6E63\u7032\u70A4\u7146\u71FB\u7228\u72F7\u7370\u7441\u74BF\u75B8\u75E3\u7622\u76CD\u7768\u79E3\u7A60\u7B6E\u7BC1\u7C5F\u7D06\u7E2F\u7E39\u8146\u81CF\u8703\u8729\u8737\u87EF\u88D2\u8A22\u8AC4\u8AF6\u8E59\u8F33\u8F42\u9169\u91B1\u9278\u93C3\u93DD\u9460\u946A\u9785\u9AD1\u9B4D\u9B4E\u9C31\u9D12\u9ECC", + "\u502A\u544E\u59AE\u59EC\u5D1B\u66A8\u6BD7\u6C76\u6E1D\u70EF\u742A\u7459\u7FE1\u82EF\u8343\u85C9\u8A79\u90DD", + "\u4EDE\u4F7B\u504C\u50EE\u52E3\u52F0\u536E\u54A9\u54BB\u54BF\u54C2\u54E6\u550F\u556A\u55E8\u564E\u5664\u5671\u568F\u56DD\u572F\u57A0\u5809\u5924\u59A3\u59A4\u59E3\u5A13\u5A23\u5B51\u5B73\u5C50\u5C8C\u6035\u60C6\u6106\u6215\u62CE\u62FD\u64ED\u6549\u6554\u655D\u659B\u65CE\u65D6\u6615\u6624\u665E\u6677\u669D\u66E9\u6772\u677C\u696B\u6A84\u6AA0\u6BFD\u6C16\u6C86\u6C94\u6CD6\u6D2E\u6D39\u6F78\u6FB6\u705E\u70CA\u7168\u723B\u7256\u7284\u73B3\u740D\u742F\u7498\u74A9\u752D\u75F3\u7634\u768E\u76B4\u76E5\u77A0\u77DC\u781F\u782D\u7AA0\u7BFE\u7FF1\u80AB\u8174\u81EC\u8202\u8222\u8228\u82DC\u8306\u83FD\u8469\u84FF\u859C\u8617\u86B1\u8722\u8C89\u8D67\u8DCE\u8E49\u8E76\u8E87\u8FE2\u8FE4\u8FF8\u9016\u905B\u9174\u982B\u98E7\u9955\u9B32", + "\u4F8F\u5055\u524C\u548E\u5583\u594E\u5CB7\u5ED6\u5F5D\u6021\u66B9\u66F0\u6C55\u6C7E\u6C82\u6E2D\u6EC7\u6ED5\u70B3\u71B9\u72C4\u73C0\u7426\u745C\u748B\u7696\u777F\u79A7\u79B9\u7F8C\u8153\u8339\u8386\u8725\u90B5\u9102\u962E\u9716\u97F6", + "\u5733\u57D4\u838E\u8FEA", + "\u50ED\u5F29\u62EE\u6A9C\u7BC6\u80F1\u8129\u8171\u822B\u8AEB", + "\u4EB3\u4F15\u4FB7\u5006\u509A\u50A2\u5102\u5109\u5115\u5137\u5138\u513C\u524B\u524E\u5277\u528A\u52E6\u52FB\u5331\u5436\u5443\u54FD\u5538\u555E\u55C6\u55C7\u5679\u5690\u5695\u56C9\u56D1\u56EA\u588A\u58E2\u5AFB\u5B2A\u5B43\u5B7F\u5BE2\u5C37\u5D27\u5D84\u5D87\u5DD4\u5EC1\u5EDD\u5F12\u5FA0\u60F1\u616B\u61F5\u61F6\u61FE\u62DA\u6371\u6399\u63C0\u6451\u647B\u6493\u64BB\u64BF\u64C4\u64F1\u64F7\u650F\u652C\u665D\u6684\u6688\u66EC\u672E\u68E7\u69A6\u69ED\u69F3\u6A01\u6AAF\u6AE5\u6BA4\u6BAE\u6BAF\u6BC6\u6C08\u6C2C\u6C59\u6D87\u6EBC\u6ECC\u6EF7\u6F6F\u6F80\u6F86\u6FD8\u6FF0\u6FFA\u7006\u7018\u7030\u7051\u7192\u71C9\u71D9\u71F4\u71FE\u7274\u7377\u74A3\u750C\u7613\u7627\u7661\u7662\u7665\u766E\u7671\u7672\u76BA\u775E\u776A\u778C\u78E7\u7955\u7A08\u7AC5\u7B4D\u7C2B\u7C6C\u7CF0\u7D02\u7D1C\u7D73\u7DA2\u7DB5\u7DDE\u7E09\u7E0A\u7E37\u7E43\u7E61\u7E7D\u7E93\u7F3D\u7FF9\u81A9\u8271\u83F8\u84C0\u8514\u85BA\u86A9\u86FB\u879E\u8814\u8836\u889E\u8932\u896A\u896F\u8993\u89B2\u8A15\u8A16\u8A1D\u8A5B\u8A6C\u8A6D\u8A7C\u8AA1\u8AA3\u8AA5\u8B0A\u8B4F\u8B59\u8B96\u8C48\u8C54\u8CBD\u8CFA\u8D13\u8E89\u8E8B\u8EAA\u8EC0\u8EDB\u8EFC\u8F12\u8F1F\u8F3E\u8F45\u8FFA\u9015\u9183\u919E\u91A3\u91D7\u91F5\u9209\u9215\u923E\u9240\u9251\u9257\u927B\u9293\u92A8\u92C5\u92C7\u92F0\u9333\u935A\u9382\u938A\u9398\u93B3\u93D7\u93DF\u93E2\u93FD\u942B\u942E\u9433\u9463\u9470\u9472\u947E\u95D0\u96CB\u97C3\u97CC\u981C\u9839\u986B\u98B6\u98EA\u9909\u991A\u9935\u993E\u9951\u99A5\u99B1\u99D9\u99DD\u99F1\u9A2B\u9A62\u9A65\u9AAF\u9AD2\u9AEF\u9B0D\u9B28\u9B77\u9BFD\u9C49\u9C5F\u9C78\u9D3F\u9D72\u9DD7\u9E1B\u9EB4\u9EF4\u9F66\u9F94", + "\u5DBD\u63C6\u6E3E\u7587\u8AF1\u8B5A\u9695", + "\u53A5\u589F\u5CD9\u7109\u7F79\u8006\u8654\u8944\u968B\u96CD", + "\u4F47\u4F91\u4FCE\u4FDF\u527D\u535E\u55DA\u56A5\u5879\u5A11\u5B7A\u5CAB\u5CF4\u5EBE\u5F7F\u5FA8\u601B\u606B\u60B8\u610D\u6134\u619A\u61FA\u6369\u6523\u65CC\u66C4\u6727\u6968\u6A05\u6A48\u6B59\u6BEC\u6D35\u6D38\u6E19\u701F\u7064\u711C\u716C\u71A8\u71E7\u7258\u743A\u746F\u75BD\u75D9\u75F2\u7669\u766C\u76DE\u7729\u77BC\u78EC\u792A\u7A37\u7A62\u7BE6\u7C2A\u7C50\u7D07\u7DD8\u7E5A\u7F8B\u7FD5\u7FF3\u8151\u81CD\u8317\u83F4\u85EA\u85FA\u8823\u895E\u89F4\u8A0C\u8A41\u8AA8\u8ACD\u8B10\u8CC1\u8D05\u8D73\u8E4A\u8E85\u8E91\u8EFB\u8F13\u9087\u914A\u91C9\u923F\u93B0\u9403\u95A8\u95AD\u9730\u9865\u9903\u9945\u9949\u99AD\u99E2\u9A6A\u9D26\u9E1E\u9EDD\u9F2C\u9F72", + "\u4E9E\u4F86\u5011\u50B3\u5152\u5169\u5340\u5718\u5B78\u5BE6\u5BF6\u5C07\u5EE3\u61C9\u6230\u6703\u689D\u6A02\u6C23\u7063\u7368\u756B\u7576\u767C\u7A31\u7D93\u7E23\u7E3D\u81FA\u8207\u842C\u85DD\u865F\u8B49\u8B80\u8CFD\u908A\u9435\u95DC\u965D\u9AD4\u9EE8", + "\u5480\u5580\u5C39\u67EF\u68B5\u6D85\u8521\u90B1", + "\u4E1F\u4F96\u4FE0\u50F1\u5118\u522A\u5291\u52C1\u52DB\u52F3\u52F5\u52F8\u53B2\u55CE\u562F\u580A\u5862\u58AE\u58D8\u58DF\u58E9\u58EF\u5925\u593E\u599D\u5ABD\u5C62\u5EC2\u5EDA\u5EE2\u5F4E\u5F65\u6085\u6158\u61FC\u6200\u62CB\u633E\u6416\u6436\u6490\u64CB\u64E0\u64FA\u6514\u651C\u6524\u6558\u6583\u66B1\u66C6\u66C9\u66E0\u6A11\u6A1E\u6A38\u6A62\u6AB3\u6B16\u6B98\u6BBC\u6C2B\u6DDA\u6DE8\u6DEA\u6DFA\u6EEF\u6EFE\u6F32\u6F51\u6F5B\u700F\u71D2\u7210\u7246\u7260\u72A7\u72F9\u7375\u7378\u758A\u760B\u76DC\u76EA\u77DA\u77FD\u78DA\u7919\u797F\u79AA\u7A05\u7A4C\u7ACA\u7C72\u7D81\u7DDD\u7E31\u7E69\u7E6B\u7E73\u7E96\u7E9C\u81BD\u81C9\u81DF\u8259\u8277\u8396\u83A7\u8523\u8525\u860A\u863F\u8667\u87A2\u87F2\u881F\u883B\u89F8\u8B20\u8B74\u8B9A\u8C4E\u8C6C\u8C93\u8CEC\u8D0A\u8D0F\u8D95\u8E10\u8F4E\u8FAF\u8FF4\u905E\u9072\u9081\u908F\u91AC\u91C0\u91C1\u91D0\u921E\u9223\u9245\u929C\u92B3\u92C1\u9336\u934A\u93C8\u9444\u9452\u947C\u947F\u9592\u95B1\u95C6\u95D6\u95E1\u95E2\u96DE\u9742\u978F\u984F\u9871\u98B1\u98C4\u99ED\u9A37\u9A45\u9A5F\u9AEE\u9B27\u9BCA\u9C77\u9D51\u9D5D\u9E79\u9E7C\u9E7D\u9EB5\u9EBC\u9F61\u9F63\u9F90\u9F9C", + "\u5283\u7562\u7DEC\u88E1\u8F2F", + "\u5009\u502B\u5049\u5075\u507D\u5091\u5098\u50B5\u50B7\u50BE\u5100\u5104\u511F\u518A\u525B\u5289\u5442\u5805\u589C\u58C7\u5922\u596A\u5A66\u5B6B\u5BE7\u5BE9\u5DBA\u5E63\u5E7E\u5FB9\u6163\u616E\u6176\u61B2\u61B6\u61F8\u639B\u63DA\u63EE\u640D\u64B2\u64C1\u64EC\u6557\u6575\u6607\u66AB\u68C4\u6A39\u6C96\u6CC1\u6E1B\u6E6F\u6E9D\u6EC5\u6F01\u6F64\u6FC3\u7058\u707D\u7344\u7642\u76E4\u7832\u790E\u7B46\u7D05\u7D0B\u7D14\u7D19\u7D1B\u7D39\u7D61\u7DB1\u7DCA\u7DD2\u7DE0\u7DE9\u7DEF\u7DF4\u7E2E\u7E3E\u8105\u8108\u81E8\u8266\u84CB\u84EE\u85A9\u885D\u88DC\u8972\u8A02\u8A0E\u8A13\u8A17\u8A2A\u8A34\u8A3A\u8A3C\u8A69\u8A73\u8A95\u8AA0\u8AA4\u8AB2\u8AC7\u8ACB\u8B00\u8B1B\u8B1D\u8B5C\u8C9D\u8C9E\u8CA2\u8CA8\u8CA9\u8CAB\u8CAC\u8CB7\u8CBF\u8CC0\u8CDE\u8CE2\u8CFC\u8D08\u8DE1\u8E8D\u8ECC\u8EDF\u8EF8\u8F14\u8F1D\u8F2A\u8F44\u9055\u9069\u9077\u907C\u90F5\u91DD\u9285\u92FC\u9326\u932F\u9375\u9396\u93AE\u93E1\u9451\u9589\u95A3\u9663\u9670\u9673\u96BB\u9801\u9802\u9803\u9806\u9808\u9810\u983B\u984D\u9858\u9867\u98EF\u98F2\u98FE\u990A\u99D0\u9A0E\u9A5A\u9B5A\u9CE5\u9DB4\u9E97\u9F8D", + "\u543E\u5BEE\u5F18\u6590\u725F\u83C5\u85E9\u9E93", + "\u5016\u53AD\u5606\u5629\u58BE\u5F14\u6065\u6144\u646F\u647A\u67F5\u6953\u6C3E\u6F2C\u6F97\u6FB1\u7169\u71E6\u71ED\u74BD\u79BF\u7A1C\u7A4E\u7AAF\u7CDE\u7D17\u7D43\u7E55\u7FA8\u807E\u8139\u8490\u8569\u856A\u87FB\u8A23\u8AB9\u8AE6\u8AFA\u8B2C\u8CD1\u91D8\u92F8\u9318\u96DB\u99B4\u9BC9\u9C2D\u9CF6\u9D61\u9DFA", + "\u4E26\u4F75\u4FC2\u500B\u5074\u5099\u512A\u5225\u5247\u5275\u5287\u52D5\u52D9\u52DD\u52E2\u5354\u54E1\u554F\u5712\u57F7\u5831\u5834\u5BAE\u5C0E\u5C64\u5CA1\u5CF6\u5E2B\u5E79\u5EAB\u5F35\u5F37\u5F8C\u5FA9\u611B\u614B\u63A1\u63DB\u6642\u66F8\u6771\u696D\u6975\u69CB\u6A19\u6A4B\u6A5F\u6BBA\u6C7A\u6E2C\u6E96\u6F22\u70BA\u7121\u71B1\u7372\u73FE\u74B0\u7570\u76E3\u78BA\u7A2E\u7A4D\u7AF6\u7BC0\u7BC4\u7BC9\u7C21\u7D00\u7D04\u7D0D\u7D1A\u7D30\u7D42\u7D44\u7D50\u7D66\u7D71\u7DAD\u7DDA\u7DE8\u7E54\u7F85\u7FA9\u7FD2\u8056\u805E\u8077\u8208\u83EF\u8449\u8853\u885B\u88FD\u8907\u898B\u898F\u8996\u89AA\u8A08\u8A18\u8A2D\u8A31\u8A55\u8A5E\u8A66\u8A71\u8A72\u8A8C\u8A8D\u8A9E\u8ABF\u8AD6\u8AF8\u8B58\u8B70\u8B77\u8CA0\u8CA1\u8CB4\u8CBB\u8CC7\u8CEA\u8ECA\u8ECD\u8F03\u8F09\u8F38\u8FB2\u9023\u9031\u9032\u904A\u904B\u904E\u9054\u9060\u9078\u907A\u9084\u9280\u9577\u9580\u958B\u9593\u9678\u967D\u968A\u968E\u969B\u96E2\u96E3\u96F2\u96FB\u97D3\u97FF\u9805\u9818\u982D\u984C\u985E\u98A8\u98DB\u9928\u99AC\u9BAE", + "\u5F6B\u6C4E\u7B87\u8A70", + "\u540B\u5B5C\u826E", + "\u4F83\u4FF8\u51CB\u52BE\u53F1\u548B\u558B\u5CB1\u5D69\u5F3C\u620E\u621F\u64E2\u67DA\u6854\u69CC\u6A35\u6C8C\u6E1A\u6F15\u6FE0\u717D\u7252\u7AFA\u82D3\u83DF\u8431\u9041\u9149\u9798", + "\u4ED5\u55E3\u572D\u57A3\u587E\u5983\u5A9B\u5C90\u5E61\u672D\u6960\u6F5F\u72D9\u72E9\u757F\u7949\u7950\u7E82\u7FCC\u82B8\u90B8\u91DC\u961C\u9B45", + "\u55AB\u6249\u643E\u6841\u68B1\u725D\u7B8B\u7C95\u7E1E\u7F36\u8A03\u8A6B\u8E74\u95A4", + "\u50AD\u50D1\u5132\u51F1\u55AC\u5617\u5687\u584A\u59EA\u5B30\u5BF5\u5C0B\u5C4D\u5EDF\u6182\u61A4\u64AB\u64FE\u66A2\u6897\u694A\u69CD\u6B3D\u6BC0\u6D29\u6F38\u7015\u7149\u71C8\u723A\u7336\u7345\u755D\u76C3\u78A9\u798D\u7AAE\u7DFB\u7E2B\u7F75\u7F77\u81E5\u834A\u852D\u85CD\u8755\u8A3B\u8A54\u8AE7\u8B02\u8B39\u8CAA\u8CE6\u8DA8\u8E5F\u8F5F\u905C\u912D\u919C\u92D2\u932B\u937E\u9418\u9583\u9812\u985B\u9905\u99B3\u99C1\u99D5\u9A30\u9CF3\u9D3B\u9D6C", + "\u6D6C\u72FD\u77A5\u8956\u9C0D", + "\u5919\u5F4A\u6063\u63AC\u649A\u6715\u6AD3\u71D0\u758B\u834F\u85F7\u88DF\u8F61\u93D1\u98F4\u9D60", + "\u4F50\u7DB2\u962A", + "\u5E96\u75D4\u91C6", + "\u5E9A\u6C40\u821C\u839E\u8FED\u9EDB", + "\u5F01\u66DC", + "\u5023\u5208\u531D\u536F\u53E9\u54C9\u598A\u59BE\u5A20\u5D6F\u5DF3\u66C7\u66D6\u66F3\u6775\u6A3D\u6ADB\u6B86\u6C72\u6E25\u73EA\u7435\u760D\u7656\u7825\u78D0\u7A14\u7A6B\u7B20\u7BE0\u7CF8\u7DAC\u7DBB\u7DBE\u80E4\u80F4\u837B\u8466\u8568\u867B\u8A63\u91E7\u9320\u935B\u9591\u965B\u98E2\u990C\u9913\u9BAB", + "\u60B6\u8AD2\u8CC2\u9237\u9328\u934D\u9397\u9830", + "\u4FB6\u50D5\u51CD\u559A\u55AA\u5674\u5857\u585A\u5875\u58B3\u596E\u59E6\u5A41\u5D50\u5E25\u5E33\u5F59\u61C7\u61F2\u6368\u6383\u65AC\u68DF\u68F2\u6A3A\u6B04\u6DBC\u6DF5\u6E26\u6E4A\u6E67\u6F54\u6F70\u6FC1\u6FEB\u7159\u727D\u7652\u77EF\u78EF\u798E\u7A40\u7AAA\u7BE4\u7C60\u7CE7\u7CFE\u7D21\u7D33\u7D5E\u7D79\u7DB4\u7DBF\u7E1B\u7E8F\u7F70\u814E\u816B\u8178\u819A\u84BC\u85A6\u865C\u8766\u8A1F\u8A50\u8A60\u8A6E\u8A87\u8A98\u8AB0\u8ADC\u8AED\u8AEE\u8B0E\u8B19\u8CA7\u8CAF\u8CB8\u8CBC\u8CC3\u8CC4\u8CCA\u8CDC\u8CE0\u8CED\u8ED2\u8F29\u8F3F\u91E3\u920D\u9234\u925B\u9298\u9310\u934B\u958F\u95A5\u9727\u97FB\u9811\u984E\u98FC\u98FD\u99D2\u99FF\u9B31\u9BE8\u9C57\u9CE9\u9CF4\u9D28\u9DF9", + "\u4E1E\u502D\u51A5\u5321\u58EC\u5A3C\u5BC5\u5CE8\u61A9\u620A\u65A1\u6714\u6853\u6893\u6C50\u6C5D\u7436\u745A\u745B\u773A\u7941\u7947\u8543\u865E\u8C5A\u914B\u99A8\u9AB8", + "\u4E99\u5BA5\u5DFD\u608C\u60C7\u60DA\u6190\u61A7\u6753\u6777\u6787\u6B4E\u6F23\u6FE1\u6FEF\u7337\u7827\u786F\u7893\u7ABA\u7B94\u7BB8\u7C3E\u7D62\u7E6D\u80B1\u81BF\u81C6\u821B\u82E7\u83F0\u84D1\u86ED\u8888\u8B01\u8B04\u8F4D\u9291\u92E4\u932E\u9354\u936C\u939A\u9957\u9AED\u9BAA\u9BAD\u9BD6\u9BDB\u9C3B\u9D1B", + "\u50C5\u53E2\u5EE0\u65BC\u70CF\u723E\u7D10\u7D9C\u806F\u8607\u862D\u8A0A\u8AFE\u8CD3\u9019\u9813\u9B6F", + "\u4EA8\u4F3D\u5384\u5EFF\u60DF\u66DD\u6E5B\u8087\u82D1\u8FE6\u9640\u9E9F", + "\u5147\u525D\u5678\u617E\u6372\u79A6\u8ABC\u92EA\u9438\u9817", + "\u6D3C\u718F\u74EE\u8712", + "\u4F84\u54C6\u5565\u68F1\u6D82\u83C7", + "\u4FE9\u4FED\u51FF\u523D\u5300\u5364\u538C\u5450\u5455\u545C\u54D1\u54D7\u5578\u56A3\u58F6\u592F\u5CE6\u5D2D\u5E90\u6073\u607C\u60EB\u61D2\u62E2\u62E3\u631A\u6320\u6323\u6361\u63B7\u63B8\u63BA\u6405\u65A9\u65F7\u6619\u6655\u67A3\u67E0\u6805\u6808\u6866\u6868\u6869\u6A71\u6BE1\u6C79\u6CA5\u6CDE\u6DA4\u6DA7\u6DA9\u6E85\u70DB\u70E6\u70EB\u7115\u724D\u7410\u759F\u75AE\u75EA\u75F9\u762B\u763E\u76B1\u77EB\u783E\u79C3\u7A8D\u7A9C\u7B5D\u7BF1\u7EC5\u7ED2\u7EDE\u7EE3\u7EF7\u7EF8\u7EFD\u7F00\u7F0E\u7F15\u7F1A\u7F20\u7F24\u7F28\u7FA1\u7FD8\u8038\u803B\u804B\u80AE\u817B\u82C7\u8327\u835E\u8367\u83BA\u8424\u864F\u8681\u8682\u8715\u8717\u8721\u8747\u874E\u8845\u886C\u889C\u88E4\u89C5\u8BB6\u8BB9\u8BC0\u8BC5\u8BE1\u8BEB\u8BEC\u8BF5\u8C0E\u8C1A\u8D2E\u8D31\u8D43\u8D4E\u8D58\u8F67\u8F7F\u9489\u9499\u949D\u94A0\u94A5\u94AE\u94BE\u94D0\u94DB\u94F2\u9508\u950C\u951A\u9525\u952D\u952F\u9530\u953B\u9540\u9550\u9570\u9576\u95F0\u960E\u9668\u96CF\u97E7\u9885\u988A\u98A4\u9965\u9975\u997A\u997F\u9985\u998D\u998F\u9A6E\u9A6F\u9A74\u9A79\u9A7C\u9A82\u9A87\u9CA4\u9CC4\u9CCD\u9CD6\u9E20\u9E25\u9E35\u9E3D\u9E45\u9E49\u9E4A\u9E66", + "\u576F\u579B\u6345\u78B4\u79EB\u79F8", + "\u4E13\u4E1A\u4E1C\u4E24\u4E25\u4E2A\u4E3E\u4E49\u4E50\u4E66\u4E9A\u4EA7\u4EBF\u4ECE\u4EEC\u4EF7\u4F17\u4F20\u5170\u5173\u519B\u51B3\u51E4\u51FB\u5219\u521B\u522B\u529E\u52A1\u52A8\u52BF\u534F\u5355\u536B\u5386\u53BF\u53D1\u53D8\u542F\u5458\u54CD\u56E2\u56ED\u56F4\u56FE\u573A\u5904\u590D\u5934\u5B81\u5B9E\u5BF9\u5BFC\u5C14\u5C9B\u5E26\u5E7F\u5E94\u5F00\u5F20\u5F3A\u603B\u6218\u65E0\u65F6\u663E\u672F\u6743\u6784\u6807\u6C14\u6C49\u707E\u70ED\u73AF\u73B0\u7535\u76D1\u786E\u79CD\u79EF\u7B80\u7C7B\u7EA2\u7EA6\u7EA7\u7EAA\u7EBF\u7EC4\u7EC7\u7ED3\u7EDF\u7EE7\u7EED\u7EF4\u7F16\u7F57\u804C\u8054\u817E\u8282\u82CF\u83B7\u8425\u89C1\u89C2\u89C4\u89C6\u8BA1\u8BA4\u8BAE\u8BAF\u8BB0\u8BB8\u8BBA\u8BBE\u8BC1\u8BC4\u8BD1\u8BDD\u8BE5\u8BED\u8BF4\u8C03\u8D22\u8D23\u8D28\u8D39\u8D44\u8D5B\u8F66\u8F6C\u8F83\u8FBE\u8FC7\u8FD0\u8FD8\u8FD9\u8FDB\u8FDE\u9009\u94C1\u957F\u95E8\u95EE\u95F4\u95FB\u961F\u9633\u9645\u9646\u96BE\u9879\u9884\u9886\u9898\u98CE\u9A6C\u9F99", + "\u51DB\u67B7", + "\u4FA5\u545B\u5499\u5520\u5570\u56F1\u5A76\u5C96\u60AF\u60ED\u618B\u61A8\u62A0\u62A1\u62E7\u6363\u6390\u63B0\u6400\u6402\u6512\u6748\u70C1\u732C\u765E\u7663\u76CF\u7741\u781A\u7980\u79C6\u79FD\u7AA5\u7B0B\u7B8D\u7BA9\u7BAB\u7BD3\u7CAA\u7EAB\u7ECA\u7EE2\u7F2D\u7F30\u8110\u8113\u81CA\u835A\u8360\u84D6\u852B\u87E5\u8869\u8A8A\u8BA5\u8BF2\u8C05\u8C12\u8D30\u8D4A\u8D61\u8DF7\u8E6D\u8E8F\u8F95\u8F99\u8FAB\u94B3\u94C6\u94E3\u9504\u954A\u9563\u95FA\u9893\u9981\u9992\u9AA1\u9CAB\u9E2F\u9E33\u9EB8", + "\u4E22\u4E8F\u4F1E\u4FA3\u5151\u517D\u51BB\u51D1\u5220\u529D\u52CB\u5367\u5389\u5395\u53E0\u53F9\u5413\u548F\u5524\u575E\u575F\u5784\u5792\u57A6\u57AB\u58F3\u5986\u5988\u5A04\u5A07\u5BA0\u5C18\u5C82\u5DE9\u5E10\u5E1C\u5F2F\u60E9\u6124\u629B\u6321\u6324\u635E\u63FD\u6401\u644A\u6491\u655B\u658B\u6635\u67AB\u67DC\u680B\u692D\u6984\u6A31\u6B7C\u6BD9\u6C22\u6CA6\u6CA7\u6CEA\u6CFB\u6CFC\u6D46\u6D47\u6D4A\u6D51\u6DA1\u6E0A\u6E83\u6EE4\u6EE5\u6F9C\u6FD2\u70C2\u7237\u727A\u730E\u7574\u75AF\u7792\u7816\u7845\u78B1\u7A77\u7A91\u7A9D\u7AD6\u7B3C\u7B5B\u7CAE\u7EA4\u7EB1\u7EBA\u7ECE\u7ED1\u7EF0\u7EF3\u7F14\u7F1D\u7F34\u7F62\u8042\u806A\u80A0\u80A4\u80BE\u80BF\u80C0\u810F\u8138\u8231\u8270\u829C\u82CD\u8350\u83B9\u841D\u8574\u8680\u8BB3\u8BBC\u8BBD\u8BC8\u8BF1\u8BFD\u8C0A\u8C0D\u8C1C\u8C24\u8C26\u8C2C\u8C2D\u8C34\u8D1E\u8D2C\u8D3C\u8D41\u8D42\u8D4C\u8D50\u8D5A\u8F69\u8F88\u8F90\u8FA9\u915D\u9171\u9493\u949E\u94A7\u94A9\u94BB\u94C3\u94C5\u94DD\u94F8\u9505\u9510\u9523\u9524\u95EF\u95F7\u95F9\u9600\u9610\u96F3\u97F5\u987D\u9882\u9888\u9896\u98D8\u9971\u9972\u9976\u997C\u9A84\u9A86\u9A8F\u9A97\u9A9A\u9AA4\u9CB8\u9CDE\u9E26\u9E43\u9E64\u9E70\u9F7F\u9F9F", + "\u534E\u62A5\u7ECF\u7F51", + "\u4E34\u4E3D\u4E4C\u4E54\u4E60\u4E61\u4E70\u4EB2\u4EC5\u4EEA\u4F18\u4F1F\u4F24\u4F26\u4FA7\u50A8\u513F\u5174\u517B\u518C\u519C\u51B5\u51CF\u5218\u521A\u5267\u52B3\u5356\u5382\u5385\u538B\u53A6\u5434\u5706\u5723\u5757\u575A\u575B\u575D\u5907\u591F\u593A\u5956\u5B59\u5BA1\u5BAB\u5BBD\u5BBE\u5BFB\u5C42\u5C81\u5E01\u5E08\u5E86\u5E93\u5F02\u5F39\u5F52\u5F55\u5F7B\u6000\u6001\u6076\u620F\u6237\u6267\u6269\u626C\u62A2\u62A4\u62DF\u62E5\u62E9\u6325\u635F\u6362\u6444\u6653\u6682\u6740\u6742\u6768\u6781\u6811\u6837\u6865\u68C0\u6B22\u6BC1\u6BD5\u6C47\u6C9F\u6CAA\u6CFD\u6D4B\u6DA8\u6E10\u6EE1\u6EE8\u706D\u7075\u70DF\u7231\u739B\u7597\u76D6\u76D8\u77FF\u7801\u7840\u79BB\u7A33\u7ADE\u7B14\u7B7E\u7CA4\u7D27\u7EB3\u7EBD\u7EC3\u7EC6\u7EC8\u7ECD\u7ED5\u7ED9\u7EDC\u7EDD\u7EE9\u7EFC\u7EFF\u7F13\u7F29\u8083\u80DC\u8111\u814A\u8230\u827A\u8363\u836F\u8428\u84DD\u867D\u8865\u88AD\u89C8\u8BA2\u8BA8\u8BA9\u8BAD\u8BB2\u8BBF\u8BC6\u8BCD\u8BD5\u8BEF\u8BF7\u8BF8\u8BFA\u8BFB\u8C08\u8D1D\u8D1F\u8D21\u8D25\u8D27\u8D2D\u8D2F\u8D35\u8D38\u8DC3\u8F6E\u8F6F\u8F7B\u8F7D\u8F86\u8F91\u8F93\u8F96\u8FB9\u8FBD\u8FC1\u8FDC\u8FDD\u9002\u9057\u90BB\u90D1\u91CA\u9488\u949F\u94A2\u94B1\u94F6\u9500\u9526\u9547\u9614\u9634\u9635\u9636\u9648\u9655\u9669\u9690\u97E9\u9875\u9876\u987A\u987B\u987E\u987F\u9891\u989D\u98DE\u9986\u9A7B\u9A8C\u9C81\u9C9C\u9F50", + "\u4E1B\u4E1D\u4E27\u4EA9\u4ED1\u4ED3\u4F2A\u4FA6\u4FA8\u503A\u503E\u507F\u5188\u51AF\u51C0\u51C9\u51ED\u51EF\u5242\u5251\u52B2\u5362\u53A2\u5415\u5417\u5428\u55B7\u5760\u5899\u5939\u594B\u5987\u5A31\u5A74\u5BAA\u5C1D\u5C7F\u5C97\u5CAD\u5E05\u5E2E\u5E99\u5E9E\u5E9F\u5F03\u5FC6\u5FE7\u60AC\u60CA\u60EF\u626B\u6270\u629A\u62E6\u62E8\u6446\u6447\u654C\u67AA\u680F\u6863\u68A6\u6C64\u6D01\u6D53\u6D9D\u6DA6\u6E14\u6E17\u6EDA\u6EE9\u707F\u70BC\u70E7\u7275\u72B9\u72EE\u72F1\u743C\u7545\u76D0\u7855\u7978\u7B79\u7BEE\u7EA0\u7EAC\u7EAF\u7EB2\u7EB5\u7EB7\u7EB8\u7EB9\u7ED8\u7EEA\u7EF5\u7F05\u7F06\u7F18\u7F5A\u80C1\u80F6\u8109\u8206\u8273\u82F9\u8346\u8361\u83B2\u8427\u8651\u867E\u8854\u89C9\u8BC9\u8BCA\u8BD7\u8BDA\u8BDE\u8BE2\u8BE6\u8BFE\u8C01\u8C0B\u8C10\u8C13\u8C22\u8C23\u8C28\u8C31\u8D24\u8D26\u8D29\u8D2A\u8D2B\u8D34\u8D37\u8D3A\u8D3E\u8D3F\u8D4B\u8D4F\u8D54\u8D56\u8D5E\u8D60\u8D62\u8D75\u8D76\u8D8B\u8F68\u8F70\u8F74\u8F85\u8F89\u8FC8\u8FDF\u900A\u9012\u903B\u9093\u90AE\u917F\u9274\u94A6\u94DC\u94ED\u94FA\u94FE\u9501\u950B\u9519\u9521\u952E\u955C\u95EA\u95ED\u95F2\u95F8\u95FD\u9601\u9605\u9647\u96B6\u96FE\u9877\u9881\u9887\u9897\u989C\u98A0\u996D\u996E\u9970\u9A70\u9A71\u9A73\u9A76\u9A7E\u9A91\u9C7C\u9E1F\u9E21\u9E23\u9E2D\u9E3F\u9E4F\u9F84", + "\u5239\u8EAF", + "\u51C4\u8471", + "\u6DC0\u7C98", + "\u5631\u5815\u8695", + "\u4E71\u4FA0\u5265\u52B1\u5374\u53A8\u53D9\u58EE\u5BDD\u5BFF\u5C3D\u5C4A\u5CE1\u5F25\u5F84\u604B\u60A6\u60E7\u60E8\u631F\u636E\u643A\u663C\u664B\u67A2\u6816\u697C\u6B8B\u6BB4\u6D45\u6E7F\u6EDE\u6F5C\u706F\u7089\u72ED\u732A\u732B\u76D7\u793C\u7977\u7A0E\u7A83\u80C6\u811A\u8131\u82A6\u830E\u848B\u865A\u866B\u86EE\u89E6\u8A89\u8DF5\u8E0A\u8E2A\u8F9E\u9065\u968F\u9759\u9EA6", + "\u601C\u75D2", + "\u4E07\u4E0E\u4E89\u4F1A\u4F53\u515A\u5185\u5199\u533A\u533B\u53C2\u53CC\u53F7\u58F0\u5965\u5B66\u5B9D\u5C06\u5C5E\u5F53\u62C5\u6570\u65AD\u65E7\u6761\u6765\u6A2A\u6B27\u6CA1\u6E29\u6E7E\u70B9\u72B6\u72EC\u732E\u753B\u79F0\u88C5\u9EC4", + "\u693F\u82EB", + "\u53F6\u6D9B\u83B1", + "\u5C61\u788D", + "\u4E10\u4E52\u4EC6\u4F88\u4FD0\u51F3\u533E\u53ED\u53EE\u5406\u541D\u5429\u5435\u5440\u5490\u5495\u54B1\u54C4\u54FC\u557C\u55D3\u5669\u56E4\u5777\u5992\u59E8\u5B7D\u5BDE\u5BE5\u5C79\u5C94\u5DCD\u5E18\u5E1A\u5E54\u5FF1\u604D\u6064\u60F6\u6127\u6177\u6233\u6252\u625B\u6273\u6296\u62C2\u62C7\u62F4\u638F\u6396\u63E3\u63EA\u6413\u6479\u64A9\u64C2\u659F\u667E\u6760\u6845\u6963\u6A90\u6B83\u6C13\u6C5E\u6D8E\u6D95\u6DCC\u6ED4\u6F13\u6F3E\u6FA1\u7076\u70D8\u710A\u71CE\u7239\u72E1\u73B7\u7599\u759A\u75A4\u75CA\u7629\u7682\u76C5\u76EF\u778E\u77AA\u787C\u7889\u788C\u78BE\u79E7\u7A96\u7A98\u7B77\u7C7D\u7CB1\u7D0A\u7D6E\u7F94\u7FCE\u8116\u814B\u814C\u819B\u828D\u82DF\u8301\u83E0\u85D5\u8611\u86A3\u8708\u8822\u8C4C\u8DB4\u8DEA\u8E42\u8E66\u8E72\u8EBA\u901B\u9157\u970E\u97ED", + "\u62FC\u88D4\u9B4F", + "\u4ED7\u4F63\u4FCF\u5018\u50BB\u50F5\u5154\u5201\u522E\u5254\u527F\u5306\u5462\u5492\u5496\u54A8\u54AA\u554A\u5561\u5564\u5566\u5885\u5938\u5AC2\u5AE9\u5CED\u5F64\u6084\u608D\u60A8\u60D5\u61C2\u61C8\u6254\u626F\u62AC\u6346\u634D\u640F\u6454\u6487\u6495\u64D2\u6746\u6789\u68B3\u68F5\u695E\u6986\u6995\u69A8\u6A44\u6AAC\u6B79\u6C28\u6C2E\u6CF5\u6DE4\u6E34\u6E3A\u6E89\u6F29\u70AB\u70AC\u7130\u715E\u7184\u71AC\u7238\u7281\u72E0\u74E3\u74F7\u7529\u7578\u761F\u7626\u76D4\u775B\u7779\u7784\u77BB\u780C\u780D\u7838\u7898\u78C5\u78F7\u7AED\u7B28\u7BE1\u7C07\u7CD5\u7CD9\u7CEF\u7F38\u800D\u8084\u809A\u8165\u816E\u832B\u8334\u840D\u8774\u886B\u888D\u88D9\u88F9\u8C41\u8D81\u8D9F\u8E22\u8E29\u8EB2\u8F9C\u9165\u918B\u9631\u964B\u964C\u9661\u9709\u9739\u9776\u9AD3\u9ED4", + "\u4E53\u5582\u5600\u6342\u7B06", + "\u5288\u543C\u5475\u5486\u54EE\u5598\u56BC\u5962\u5A36\u5A9A\u5B75\u5BA6\u5C38\u5C4E\u5F8A\u5F98\u627C\u62CC\u62D7\u63C9\u6930\u6954\u69D0\u6BEF\u6C90\u6CBD\u6CBE\u6F31\u6F88\u70D9\u7329\u75BC\u75F0\u7737\u77D7\u7B19\u7FB9\u803F\u80D6\u813E\u81C0\u8205\u8309\u83BD\u846B\u8517\u868C\u8759\u8815\u8859\u8B6C\u8E81\u8EAC\u90A2\u9698\u9B44", + "\u722C\u7FD4", + "\u5228\u5315\u542E\u54CE\u5509\u5527\u5543\u55B3\u55E1\u5636\u568E\u5FFF\u61E6\u6376\u642A\u6726\u74E4\u76F9\u7736\u7BD9\u8019\u80F0\u80F3\u812F\u818A\u8200\u8214\u8638\u869C\u86C0\u86C6\u86D4\u87C6\u88B1\u8902\u8C7A\u8E4B\u9119", + "\u67D2\u6ED3\u87C0\u87CB\u8DDB\u901E\u9163", + "\u4F5B\u52D2\u54C8\u62FF\u66FC\u6D59\u704C\u7586\u9ECE", + "\u4E48\u4EFF\u4F19\u4FF1\u5021\u5077\u5195\u5212\u5269\u5401\u541E\u5427\u54EA\u5587\u558A\u55BB\u566A\u573E\u574E\u5783\u57AE\u584C\u58E4\u5960\u5976\u59CA\u5A1C\u5DE2\u5F99\u600E\u6015\u6263\u626D\u6293\u62C6\u62D6\u62EF\u62F1\u6316\u632A\u6380\u6389\u63D2\u641E\u64C5\u64CE\u65F1\u6664\u6735\u6770\u67EC\u6846\u684C\u68AD\u6B47\u6B49\u6B67\u6C1B\u6C27\u6C2F\u6C5B\u6C89\u6DF9\u6EAF\u70AE\u70E4\u731C\u7334\u73BB\u7470\u76FC\u788E\u789F\u78B0\u78B3\u7A0D\u7A3B\u7A57\u7CB9\u7F69\u8335\u8354\u84BF\u8DCC\u8DD1\u904F\u90A8\u9189\u9677\u9738\u978B", + "\u5162\u53E8\u542D\u5501\u552C\u5639\u563F\u56B7\u6043\u60B4\u6194\u61CA\u634E\u63CD\u6414\u64AC\u6DAE\u6E43\u6F66\u7095\u7316\u733E\u7728\u7830\u78D5\u7ABF\u7FE9\u8018\u80EF\u8198\u8693\u86AA\u86AF\u874C\u8783\u879F\u8892\u8E6C", + "\u4FD8\u4FEF\u501A\u5085\u5180\u526A\u5323\u54ED\u5634\u56CA\u58A9\u58F9\u5955\u5978\u59DA\u5A49\u5B55\u5BC7\u5BE8\u5D4C\u5E62\u6467\u64BC\u6500\u655E\u6572\u658C\u6670\u68CD\u68D5\u68E0\u6912\u6A0A\u6BB7\u6C9B\u6D3D\u6DC6\u6E23\u6F8E\u7011\u7092\u714C\u73AB\u7405\u7624\u76D2\u7960\u79C9\u7A20\u7BF7\u7F50\u804A\u8086\u81C2\u8292\u82DE\u852C\u857E\u859B\u8760\u8C6B\u8DBE\u8E48\u8F9F\u96A7", + "\u4E8E\u5DF2\u5FB7\u7AD9", + "\u4E58\u4ECD\u4EFD\u4F30\u4F60\u4F69\u503C\u5047\u51B0\u51F0\u5361\u5377\u53E6\u54E5\u552E\u5708\u5740\u5761\u57C3\u5821\u589E\u5979\u59C6\u5B69\u5B83\u5E15\u5E76\u5F17\u5F88\u6208\u622A\u624E\u627E\u62D4\u62DC\u63ED\u641C\u6536\u6548\u65C1\u665A\u6668\u67E5\u6B65\u6BCF\u6C61\u6CDB\u6D4E\u6D89\u6DB5\u6E38\u6EAA\u6FB3\u70B8\u745F\u7538\u7A97\u7F3A\u7F55\u805A\u8258\u827E\u82AC\u8303\u83F2\u8482\u85CF\u8DDF\u903E\u9080\u970D\u9760\u9ED1\u9ED8", + "\u634F\u6518\u7B50\u809B", + "\u54A7\u57C2\u5AB3\u60CB\u6886\u8378\u85D0\u8671", + "\u5080\u5121\u51A4\u54AC\u55DC\u592D\u5DEB\u6292\u68D8\u69B4\u6A59\u6E24\u7FC5\u80DA\u8180\u86DB\u8700\u8DCB\u9761", + "\u4E30\u51E0\u542C\u613F", + "\u4E56\u547B\u55FD\u5C41\u606C\u6115\u6CAE\u7119\u795F\u7CDC\u86C9\u86F9\u8713\u873B\u8757\u8925\u892A\u96F9", + "\u51B2\u5308\u5398\u54B8\u59DC\u5C4F\u5D14\u5F6D\u60E0\u6241\u6350\u699C\u6BEB\u6C6A\u6CC4\u6DEE\u6F58\u6F6D\u7199\u77EE\u7ADF\u8058\u820D\u8212\u8389\u8587\u884D\u8881\u8FA8\u8FF9\u96D5", + "\u574F\u6251\u6302", + "\u52FA\u5544\u60F0\u6994\u86A4\u86E4", + "\u4E59\u4E7E\u4EAD\u4EF0\u4EF2\u4F0F\u4F10\u4FAF\u4FCA\u500D\u501F\u5076\u508D\u50E7\u5112\u5146\u5192\u51AC\u51DD\u51FD\u5200\u5237\u524A\u52A3\u52C3\u52C7\u52DF\u5351\u5352\u5353\u5378\u537F\u53E5\u5439\u54FA\u574A\u5782\u57CB\u5893\u58C1\u5915\u5937\u5949\u5951\u5974\u59B9\u5A18\u5A5A\u5ACC\u5B54\u5B5D\u5B64\u5B8F\u5BBF\u5BD2\u5C3A\u5C6F\u5CB3\u5D07\u5DE7\u5E84\u5E8A\u5F26\u5F69\u5F70\u5F90\u5FAA\u5FCD\u6012\u6016\u602A\u60A0\u60B2\u60BC\u6148\u6162\u6170\u6291\u6298\u62AB\u62BC\u62BD\u62D2\u62D3\u62D8\u62F3\u6311\u638C\u6398\u63E1\u642C\u6458\u64A4\u654F\u656C\u659C\u65E2\u65E8\u65EC\u6606\u6614\u6676\u6691\u6696\u66F9\u6749\u676F\u679A\u679D\u67CF\u67D4\u67F1\u67F3\u67F4\u6817\u6842\u6843\u6851\u68A8\u68CB\u68D2\u6B20\u6B32\u6BBF\u6C57\u6C88\u6CCA\u6D17\u6D1E\u6D69\u6D6E\u6D78\u6DE1\u6DFB\u6E58\u6EB6\u6F0F\u6F20\u7070\u708E\u70AD\u7126\u718A\u71C3\u7267\u72C2\u731B\u7384\u73A9\u73CD\u7434\u75AB\u75DB\u76C6\u76FE\u773C\u7891\u78C1\u795D\u7965\u79D2\u79DF\u79E6\u7A00\u7B11\u7B51\u7B54\u7C89\u7C92\u7CD6\u7D2B\u7F8A\u7FBD\u7FFC\u8010\u80A5\u80CE\u8150\u8179\u819C\u8247\u829D\u82B3\u82D7\u82E6\u8302\u8336\u8352\u83CA\u83CC\u83DC\u845B\u846C\u84B2\u84B8\u84C4\u8584\u864E\u86C7\u8861\u8863\u8870\u888B\u8896\u88D5\u8986\u8C46\u8DA3\u8E0F\u8F9B\u8FC5\u8FEB\u8FF7\u9003\u9006\u902E\u9042\u9063\u90ED\u963B\u9676\u96EA\u9756\u9B3C\u9B42\u9F3B", + "\u4E01\u4E03\u4E45\u4E5D\u4E88\u4E92\u4EA1\u4ECB\u4EE4\u4F01\u4F0A\u4F2F\u4F3C\u4F4E\u4F4F\u4F55\u4F8B\u4F9D\u4FBF\u4FEE\u505C\u50CF\u516B\u516D\u5175\u5177\u5178\u5207\u520A\u5224\u526F\u529F\u52A9\u5343\u5348\u535A\u5370\u53BB\u53CB\u53F3\u5409\u542B\u544A\u547C\u5584\u5747\u5802\u590F\u592B\u5931\u5947\u597D\u5A01\u5A92\u5B63\u5B8C\u5B97\u5BA2\u5BA3\u5BA4\u5BB3\u5BB9\u5BC6\u5BCC\u5BDF\u5C04\u5C1A\u5C45\u5C4B\u5CB8\u5DE6\u5E0C\u5E1D\u5E2D\u5E55\u5E8F\u5E95\u5E97\u5EA7\u5EB7\u5EF6\u5F8B\u5FAE\u5FC5\u5FD7\u5FF5\u601D\u6025\u606F\u60F3\u611F\u623F\u6253\u6279\u627F\u6295\u6297\u62EC\u6388\u6392\u63F4\u6545\u6551\u6574\u6599\u65C5\u65E9\u6613\u6620\u6625\u666E\u666F\u66B4\u66F4\u670D\u671B\u6728\u672B\u6751\u677E\u67B6\u6838\u6839\u6848\u68EE\u690D\u6982\u6A21\u6B4C\u6B62\u6B66\u6BB5\u6BCD\u6C0F\u6C38\u6C42\u6CBF\u6CE2\u6CE8\u6D0B\u6D3E\u6D88\u6DF1\u6E05\u6E56\u706B\u7167\u7206\u7236\u7247\u7387\u7530\u7537\u7559\u7565\u7591\u75C5\u767B\u767D\u767E\u7687\u76DB\u76DF\u771F\u7763\u77ED\u7834\u79FB\u7A81\u7AE0\u7AEF\u7B56\u7B97\u7C4D\u7CBE\u7D20\u7D22\u7F72\u7FA4\u8001\u8003\u81F4\u822A\u826F\u82B1\u8349\u843D\u878D\u8857\u89D2\u8B66\u8C37\u8D70\u8D85\u8D8A\u8DB3\u8FF0\u8FFD\u9001\u901F\u90A3\u90A6\u914D\u91CE\u9632\u963F\u9644\u964D\u9664\u96C4\u96E8\u9752\u9769\u98DF", + "\u4E09\u4E0A\u4E0B\u4E0D\u4E16\u4E3B\u4E8B\u4E8C\u4EE3\u4EE5\u4F4D\u4F5C\u4F7F\u5165\u5168\u516C\u5171\u51FA\u5206\u5229\u5236\u524D\u529B\u52A0\u5316\u5317\u5357\u539F\u53CA\u53F0\u5408\u540C\u540D\u548C\u5730\u57FA\u5916\u591A\u5929\u5B50\u5B9A\u5BB6\u5C0F\u5C71\u5DDE\u5DE5\u5E02\u5E73\u5EA6\u5EFA\u5F0F\u6027\u6210\u6240\u6307\u653F\u6587\u65B0\u65B9\u660E\u6700\u6709\u671F\u672C\u6B21\u6B63\u6C11\u6CBB\u6CD5\u6D77\u7269\u7279\u7406\u751F\u7528\u7531\u754C\u76EE\u76F8\u793E\u79D1\u7ACB\u7B2C\u7B49\u7CFB\u8005\u80FD\u81EA\u82F1\u884C\u8868\u897F\u8981\u901A\u9053\u90E8\u90FD\u91CD\u9AD8", + "\u4E4D\u4F36\u5319\u6A61\u6DCB\u7194", + "\u4E5E\u4F43\u5026\u50FB\u515C\u5243\u5420\u5446\u54B3\u54BD\u553E\u55A7\u5703\u5984\u5AC9\u5B09\u5C51\u5DFE\u5ED3\u5F1B\u6055\u618E\u62D9\u65A7\u6652\u6977\u6EBA\u707C\u75D8\u79E4\u7AFF\u7B4F\u7CA5\u808B\u8098\u80B4\u8235\u82DB\u849C\u8549\u868A\u86FE\u8718\u914C", + "\u4E08\u4E38\u4F8D\u50DA\u5203\u5256\u52C9\u52D8\u52FE\u5320\u533F\u5375\u53D4\u540F\u54E8\u56DA\u5806\u5996\u5999\u59A5\u59A8\u59FF\u5AE1\u5BB0\u5BF8\u5C09\u5C3F\u5C48\u5C65\u5D29\u5E06\u5E4C\u5EB5\u5EB6\u5EB8\u5F13\u5FCC\u5FD8\u6052\u606D\u609F\u60D1\u614E\u6247\u62B1\u6349\u64E6\u6577\u65ED\u6674\u6734\u67C4\u6850\u690E\u6A58\u6B3A\u6B89\u6C41\u6CBC\u6CCC\u6CF3\u6D74\u6DAF\u6DF3\u6ECB\u6F02\u6F84\u71E5\u7261\u7272\u72AC\u72FC\u733F\u7409\u755C\u76F2\u7720\u77AC\u77E2\u7802\u786B\u78E8\u7901\u7948\u79E9\u7A1A\u7A74\u7AE3\u7B4B\u7B52\u7BB1\u7C3F\u8015\u8096\u809D\u80A2\u80A9\u80AA\u80BA\u80F8\u8102\u810A\u8154\u8155\u8170\u817A\u81A8\u81ED\u820C\u8236\u82BD\u8305\u83E9\u83F1\u840C\u85FB\u8650\u8702\u8A93\u8E44\u8FB0\u9038\u9091\u90AA\u916C\u9175\u9177\u9685\u96C0\u96C7\u96CC\u97AD", + "\u63D6\u803D", + "\u602F\u7566", + "\u634C\u7C38", + "\u4E18\u4E73\u4E95\u4EAB\u4EC1\u4ED8\u4ED9\u4F11\u4F34\u4F38\u4F59\u4FB5\u4FC3\u4FD7\u5012\u5019\u5065\u50AC\u5144\u5145\u514D\u517C\u51A0\u51B7\u5211\u5238\u523A\u523B\u5272\u52E4\u5360\u5371\u539A\u541B\u5426\u5438\u5473\u54F2\u5510\u552F\u5531\u559C\u5609\u56F0\u56FA\u591C\u5948\u594F\u59BB\u59D3\u5B85\u5B87\u5B88\u5B99\u5B9C\u5BC4\u5BFA\u5C0A\u5C3E\u5CA9\u5D0E\u5DE1\u5DE8\u5DEE\u5DF1\u5E45\u5E78\u5E7B\u5E7C\u5EAD\u5EF7\u5F1F\u5F31\u5F79\u5F7C\u5F85\u5F92\u5FA1\u5FE0\u6050\u60A3\u6212\u62DB\u632F\u6355\u63A2\u63AA\u63CF\u642D\u6469\u64CD\u653B\u6563\u660C\u662D\u667A\u6697\u66FF\u6750\u675F\u677F\u6790\u67D3\u682A\u6885\u68B0\u6B8A\u6B96\u6BDB\u6C60\u6CB9\u6CC9\u6D25\u6D66\u6DB2\u6DF7\u6E21\u6ED1\u6F2B\u6F6E\u6FC0\u7235\u725B\u72AF\u7389\u7532\u7533\u756A\u75BE\u75C7\u76AE\u76CA\u7740\u786C\u7956\u7968\u796D\u7981\u79C0\u79C1\u79CB\u79D8\u7A3F\u7AE5\u7AF9\u7E41\u7F6A\u7FFB\u8089\u80CC\u80DE\u81E3\u821E\u8239\u82E5\u8328\u8377\u85E4\u8840\u88C1\u88C2\u8C6A\u8D64\u8DDD\u8FCE\u8FD4\u9000\u9014\u907F\u90CA\u90CE\u90E1\u9152\u9178\u9686\u9694\u969C\u9707\u9732\u9AA8\u9B54\u9E7F\u9EBB", + "\u4E39\u4E43\u4EAE\u4F73\u504F\u505A\u51C6\u51CC\u52AA\u5339\u5347\u53EB\u53EC\u5448\u5766\u57F9\u5854\u585E\u58A8\u5B8B\u5C01\u5CF0\u5E72\u5EC9\u5F80\u5F81\u5FBD\u5FEB\u6069\u6211\u624D\u628A\u62B5\u62CD\u6309\u63A7\u64AD\u6566\u6597\u65CB\u65D7\u6628\u6717\u6731\u674E\u675C\u683D\u6881\u6B3E\u6BD2\u6C7D\u6C99\u6CE5\u6CF0\u6D1B\u6D2A\u70C8\u719F\u724C\u7259\u73E0\u73ED\u745E\u74E6\u7518\u751A\u7686\u770B\u7B26\u8033\u80A1\u80E1\u821F\u83AB\u8499\u8D74\u8DE8\u900F\u9010\u9047\u904D\u906D\u9675\u96C5\u96F6\u96F7\u9700\u9F13", + "\u5764\u59D0\u5A03\u6062\u6108\u68C9\u7164\u79BE\u7BAD\u903C", + "\u4EA5\u50B2\u532A\u5366\u543B\u54E9\u5632\u59D1\u5BB5\u5DF7\u5F6A\u5F6C\u5FFD\u6070\u6168\u61BE\u63A0\u63A9\u6478\u65A4\u68A7\u6A1F\u6CAB\u70F9\u711A\u723D\u7262\u72F8\u751C\u754F\u75B9\u76C8\u7709\u7897\u7CCA\u7F9E\u8299\u82AD\u82B9\u82D4\u8304\u84C9\u84EC\u854A\u85AF\u86D9\u8FA3\u9187\u97A0", + "\u4E14\u4E5F\u4F46\u514B\u5176\u5230\u5373\u53EA\u540E\u5982\u5C3C\u5DF4\u6216\u62C9\u65AF\u66FE\u6B64\u6D32\u6D6A\u7BC7\u800C", + "\u4E4E\u4E9B\u4EA6\u4EC0\u4FC4\u5403\u5957\u5C24\u6089\u6258\u67D0\u758F\u7FF0\u8D6B", + "\u4FAE\u5944\u5A29\u6101\u62ED\u6328\u637B\u6666\u6687\u66AE\u673D\u6756\u67FF\u6813\u68A2\u699B\u7078\u708A\u7396\u7422\u7525\u75E2\u76BF\u7766\u77B3\u7A3C\u7A92\u819D\u81FC\u8237\u8338\u8511\u88F3\u8FC2", + "\u4E11\u4F3A\u4F51\u5197\u51B6\u51F9\u52FF\u541F\u5507\u5589\u5993\u5A7F\u5AC1\u5B9B\u5BC2\u5BE1\u5F04\u5F0A\u5F27\u6020\u6028\u6068\u6094\u6109\u611A\u614C\u621A\u62B9\u62D0\u62F7\u62FE\u632B\u633D\u6367\u660F\u6627\u6643\u66D9\u674F\u6795\u67AF\u67D1\u6876\u68DA\u68FA\u6905\u69FD\u6A80\u6B6A\u6CB8\u6CE3\u6DD1\u6DEB\u6E9C\u6EA2\u6EF4\u6F06\u714E\u716E\u722A\u7280\u74A7\u752B\u75B2\u75D5\u75F4\u77AD\u77E9\u785D\u79BD\u7A3D\u7A9F\u7B1B\u7B95\u7C9F\u7CDF\u80C3\u8106\u817F\u818F\u81B3\u828B\u82A5\u82AF\u840E\u851A\u853D\u8776\u87F9\u8877\u8910\u8912\u8C79\u8D66\u8FB1\u9017\u90C1\u916A\u9699\u96C1\u971C\u9774\u978D", + "\u4E4B\u4E86\u4E94\u4EA4\u4EAC\u4ECA\u4ED6\u4EF6\u4EFB\u4F9B\u4FDD\u4FE1\u5143\u5148\u5149\u518D\u5217\u521D\u5305\u5341\u534A\u53C8\u53CD\u53D6\u53D7\u53E3\u53E4\u53EF\u53F2\u53F8\u5404\u5411\u5468\u547D\u54C1\u5546\u5668\u56DB\u56DE\u56E0\u571F\u578B\u57CE\u57DF\u5883\u58EB\u592A\u592E\u5973\u59CB\u59D4\u5B57\u5B58\u5B89\u5B98\u5C11\u5C31\u5C40\u5C55\u5DDD\u5E03\u5E38\u5E9C\u5F15\u5F62\u5F71\u5F97\u5FC3\u60C5\u610F\u624B\u6280\u6301\u63A5\u63A8\u63D0\u652F\u6539\u653E\u6559\u65BD\u65CF\u661F\u66F2\u671D\u672A\u6797\u679C\u6821\u683C\u6B7B\u6BD4\u6C34\u6C5F\u6CB3\u6D3B\u6D41\u6E2F\u6E90\u6F14\u7136\u7248\u738B\u7403\u76F4\u7701\u77E5\u77F3\u7814\u793A\u795E\u798F\u7A0B\u7A76\u7A7A\u7BA1\u7C73\u7F6E\u7F8E\u80B2\u81F3\u822C\u8272\u8457\u88AB\u89E3\u8A00\u8C61\u8D77\u8DEF\u8EAB\u8FD1\u9020\u91CC\u91CF\u91D1\u9650\u9662\u96C6\u975E\u9762\u97F3\u9996\u9999", + "\u55C5\u57A2\u58D5\u59E5\u637A\u74E2\u7CE0\u895F", + "\u4E19\u4E32\u4E4F\u4E91\u4EC7\u4ED4\u4F0D\u5141\u51E1\u51F6\u51F8\u52AB\u535C\u53C9\u53DB\u540A\u5410\u54C0\u559D\u5750\u5751\u576A\u57E0\u5824\u582A\u5830\u5835\u5851\u5858\u586B\u5954\u59FB\u5A46\u5B5F\u5BB4\u5BD3\u5C16\u5C60\u5CFB\u5D16\u5E16\u5E3D\u5E7D\u5E87\u5ECA\u5FD9\u60DC\u60F9\u6155\u6167\u6234\u626E\u6276\u6284\u633A\u6377\u6492\u649E\u64B0\u6562\u6591\u65A5\u65E6\u65FA\u6602\u670B\u676D\u68AF\u695A\u6B23\u6BC5\u6C70\u6C83\u6CE1\u6D8C\u6DD8\u6E20\u71D5\u72D0\u72D7\u73B2\u73CA\u7433\u7483\u74DC\u74F6\u7554\u764C\u7761\u77DB\u78A7\u7A46\u7A7F\u7A84\u7C97\u7D2F\u7FC1\u7FE0\u8000\u8017\u808C\u80AF\u8404\u8461\u8463\u8475\u8513\u85AA\u8679\u86CB\u871C\u87BA\u88F8\u8C8C\u8DF3\u8FC4\u901D\u9022\u906E\u9075\u9192\u91C7\u966A\u971E\u9910\u9B41\u9F0E\u9F20" + }; + + + static { + for (String cjk_list : CJK_CLASS) { + char representative = cjk_list.charAt(0); + for (int i = 0; i < cjk_list.length(); ++i) { + cjk_map.put(cjk_list.charAt(i), representative); + } + } + } +} diff --git a/src/main/java/org/xbib/elasticsearch/index/mapper/langdetect/LangdetectMapper.java b/src/main/java/org/xbib/elasticsearch/index/mapper/langdetect/LangdetectMapper.java index 23b5903..dc83a1b 100644 --- a/src/main/java/org/xbib/elasticsearch/index/mapper/langdetect/LangdetectMapper.java +++ b/src/main/java/org/xbib/elasticsearch/index/mapper/langdetect/LangdetectMapper.java @@ -1,7 +1,11 @@ package org.xbib.elasticsearch.index.mapper.langdetect; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.fielddata.FieldDataType; import org.elasticsearch.index.mapper.FieldMapperListener; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperParsingException; @@ -9,10 +13,11 @@ import org.elasticsearch.index.mapper.MergeMappingException; import org.elasticsearch.index.mapper.ObjectMapperListener; import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.core.AbstractFieldMapper; import org.elasticsearch.index.mapper.core.StringFieldMapper; -import org.xbib.elasticsearch.common.langdetect.Detector; -import org.xbib.elasticsearch.common.langdetect.Language; -import org.xbib.elasticsearch.common.langdetect.LanguageDetectionException; +import org.xbib.elasticsearch.module.langdetect.LangdetectService; +import org.xbib.elasticsearch.index.analysis.langdetect.Language; +import org.xbib.elasticsearch.index.analysis.langdetect.LanguageDetectionException; import java.io.IOException; import java.nio.charset.Charset; @@ -21,21 +26,22 @@ import static org.elasticsearch.index.mapper.MapperBuilders.stringField; -public class LangdetectMapper implements Mapper { +public class LangdetectMapper extends AbstractFieldMapper { public static final String CONTENT_TYPE = "langdetect"; - public static class Builder extends Mapper.Builder { + public static class Builder extends AbstractFieldMapper.Builder { private StringFieldMapper.Builder contentBuilder; - private StringFieldMapper.Builder langBuilder = stringField("lang"); - private Detector detector; + private StringFieldMapper.Builder langBuilder; + private ImmutableSettings.Builder settingsBuilder; - public Builder(String name, Detector detector) { - super(name); - this.detector = detector; - this.contentBuilder = stringField(name); + public Builder(String name) { + super(name, new FieldType(Defaults.FIELD_TYPE)); this.builder = this; + this.contentBuilder = stringField(name); + this.langBuilder = stringField("lang"); + this.settingsBuilder = ImmutableSettings.settingsBuilder(); } public Builder content(StringFieldMapper.Builder content) { @@ -48,110 +54,227 @@ public Builder lang(StringFieldMapper.Builder lang) { return this; } + public Builder ntrials(int trials) { + settingsBuilder.put("number_of_trials", trials); + return this; + } + + public Builder alpha(double alpha) { + settingsBuilder.put("alpha", alpha); + return this; + } + + public Builder alphaWidth(double alphaWidth) { + settingsBuilder.put("alpha_width", alphaWidth); + return this; + } + + public Builder iterationLimit(int iterationLimit) { + settingsBuilder.put("iteration_limit", iterationLimit); + return this; + } + + public Builder probThreshold(double probThreshold) { + settingsBuilder.put("prob_threshold", probThreshold); + return this; + } + + public Builder convThreshold(double convThreshold) { + settingsBuilder.put("conv_threshold", convThreshold); + return this; + } + + public Builder baseFreq(int baseFreq) { + settingsBuilder.put("base_freq", baseFreq); + return this; + } + + public Builder pattern(String pattern) { + settingsBuilder.put("pattern", pattern); + return this; + } + + public Builder max(int max) { + settingsBuilder.put("max", max); + return this; + } + + public Builder binary(boolean binary) { + settingsBuilder.put("binary", binary); + return this; + } + + public Builder map(Map map) { + for (String key : map.keySet()) { + settingsBuilder.put("map." + key, map.get(key)); + } + return this; + } + + public Builder languages(List languages) { + settingsBuilder.putArray("languages", languages.toArray(new String[languages.size()])); + return this; + } + @Override public LangdetectMapper build(BuilderContext context) { context.path().add(name); StringFieldMapper contentMapper = contentBuilder.build(context); StringFieldMapper langMapper = langBuilder.build(context); context.path().remove(); - return new LangdetectMapper(name, detector, contentMapper, langMapper); + LangdetectService detector = new LangdetectService(settingsBuilder.build()); + detector.start(); + return new LangdetectMapper(new Names(name), contentMapper, langMapper, detector); } } public static class TypeParser implements Mapper.TypeParser { - private Detector detector; - - public TypeParser(Detector detector) { - this.detector = detector; - } - @SuppressWarnings({"unchecked", "rawtypes"}) @Override public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { - LangdetectMapper.Builder builder = new Builder(name, detector); + LangdetectMapper.Builder builder = new Builder(name); for (Map.Entry entry : node.entrySet()) { String fieldName = entry.getKey(); Object fieldNode = entry.getValue(); - - if (fieldName.equals("fields")) { - Map fieldsNode = (Map) fieldNode; - for (Map.Entry fieldsEntry : fieldsNode.entrySet()) { - String propName = fieldsEntry.getKey(); - Object propNode = fieldsEntry.getValue(); - - if (name.equals(propName)) { - builder.content((StringFieldMapper.Builder) parserContext.typeParser("string").parse(name, - (Map) propNode, parserContext)); - } else if ("lang".equals(propName)) { - builder.lang((StringFieldMapper.Builder) parserContext.typeParser("string").parse("lang", - (Map) propNode, parserContext)); + switch (fieldName) { + case "fields": { + Map fieldsNode = (Map) fieldNode; + for (Map.Entry fieldsEntry : fieldsNode.entrySet()) { + String propName = fieldsEntry.getKey(); + Object propNode = fieldsEntry.getValue(); + if (name.equals(propName)) { + builder.content((StringFieldMapper.Builder) parserContext.typeParser("string").parse(name, + (Map) propNode, parserContext)); + } else if ("lang".equals(propName)) { + builder.lang((StringFieldMapper.Builder) parserContext.typeParser("string").parse("lang", + (Map) propNode, parserContext)); + } } + break; + } + case "number_of_trials": { + builder.ntrials((Integer)fieldNode); + break; + } + case "alpha": { + builder.alpha((Double)fieldNode); + break; + } + case "alpha_width": { + builder.alphaWidth((Double)fieldNode); + break; + } + case "iteration_limit": { + builder.iterationLimit((Integer)fieldNode); + break; + } + case "prob_threshold": { + builder.probThreshold((Double)fieldNode); + break; + } + case "conv_threshold": { + builder.convThreshold((Double)fieldNode); + break; + } + case "base_freq": { + builder.baseFreq((Integer)fieldNode); + break; + } + case "pattern": { + builder.pattern((String)fieldNode); + break; + } + case "max": { + builder.max((Integer)fieldNode); + break; + } + case "binary": { + builder.binary((Boolean)fieldNode); + break; + } + case "map" : { + builder.map((Map)fieldNode); + break; + } + case "languages" : { + builder.languages((List)fieldNode); + break; } } } - return builder; } } - private final String name; - private final Detector detector; private final StringFieldMapper contentMapper; + private final StringFieldMapper langMapper; - public LangdetectMapper(String name, Detector detector, StringFieldMapper contentMapper, StringFieldMapper langMapper) { - this.name = name; - this.detector = detector; + private final LangdetectService detector; + + public LangdetectMapper(Names names, StringFieldMapper contentMapper, StringFieldMapper langMapper, + LangdetectService detector) { + super(names, 1.0f, Defaults.FIELD_TYPE, false, null, null, null, null, null, null, null, null, null, null); this.contentMapper = contentMapper; this.langMapper = langMapper; + this.detector = detector; + } + + @Override + public FieldType defaultFieldType() { + return Defaults.FIELD_TYPE; + } + + @Override + public FieldDataType defaultFieldDataType() { + return null; } @Override - public String name() { - return name; + public Object value(Object value) { + return null; } @Override public void parse(ParseContext context) throws IOException { String content = null; - XContentParser parser = context.parser(); XContentParser.Token token = parser.currentToken(); - if (token == XContentParser.Token.VALUE_STRING) { - // try decode UTF-8 base64 (e.g. from attachment mapper plugin) content = parser.text(); - try { - byte[] b = parser.binaryValue(); - if (b != null && b.length > 0) { - content = new String(b, Charset.forName("UTF-8")); + if (detector.getSettings().getAsBoolean("binary", false)) { + try { + byte[] b = parser.binaryValue(); + if (b != null && b.length > 0) { + content = new String(b, Charset.forName("UTF-8")); + } + } catch (Exception e) { } - } catch (Exception e) { - // ignore } } - - context.externalValue(content); - contentMapper.parse(context); - if (content == null) { return; } - + context = context.createExternalValueContext(content); + contentMapper.parse(context); try { List langs = detector.detectAll(content); for (Language lang : langs) { - context.externalValue(lang.getLanguage()); + context = context.createExternalValueContext(lang.getLanguage()); langMapper.parse(context); } } catch (LanguageDetectionException e) { - context.externalValue("unknown"); + context = context.createExternalValueContext("unknown"); langMapper.parse(context); } } + @Override + protected void parseCreateField(ParseContext context, List fields) throws IOException { + } + @Override public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappingException { } @@ -174,7 +297,7 @@ public void close() { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(name); + builder.startObject(name()); builder.field("type", CONTENT_TYPE); builder.startObject("fields"); @@ -185,4 +308,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endObject(); return builder; } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + } \ No newline at end of file diff --git a/src/main/java/org/xbib/elasticsearch/module/langdetect/LangdetectModule.java b/src/main/java/org/xbib/elasticsearch/module/langdetect/LangdetectModule.java index 7c44796..b122067 100644 --- a/src/main/java/org/xbib/elasticsearch/module/langdetect/LangdetectModule.java +++ b/src/main/java/org/xbib/elasticsearch/module/langdetect/LangdetectModule.java @@ -2,7 +2,6 @@ import org.elasticsearch.common.inject.Binder; import org.elasticsearch.common.inject.Module; -import org.xbib.elasticsearch.plugin.langdetect.RegisterLangdetectType; public class LangdetectModule implements Module { diff --git a/src/main/java/org/xbib/elasticsearch/module/langdetect/LangdetectService.java b/src/main/java/org/xbib/elasticsearch/module/langdetect/LangdetectService.java new file mode 100644 index 0000000..4d8d61d --- /dev/null +++ b/src/main/java/org/xbib/elasticsearch/module/langdetect/LangdetectService.java @@ -0,0 +1,347 @@ +package org.xbib.elasticsearch.module.langdetect; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.component.AbstractLifecycleComponent; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; +import org.xbib.elasticsearch.index.analysis.langdetect.LangProfile; +import org.xbib.elasticsearch.index.analysis.langdetect.Language; +import org.xbib.elasticsearch.index.analysis.langdetect.LanguageDetectionException; +import org.xbib.elasticsearch.index.analysis.langdetect.NGram; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.regex.Pattern; + +public class LangdetectService extends AbstractLifecycleComponent { + + private final static Pattern word = Pattern.compile("[\\P{IsWord}]", Pattern.UNICODE_CHARACTER_CLASS); + + private final static String[] DEFAULT_LANGUAGES = new String[] { + "af", + "ar", + "bg", + "bn", + "cs", + "da", + "de", + "el", + "en", + "es", + "et", + "fa", + "fi", + "fr", + "gu", + "he", + "hi", + "hr", + "hu", + "id", + "it", + "ja", + "kn", + "ko", + "lt", + "lv", + "mk", + "ml", + "mr", + "ne", + "nl", + "no", + "pa", + "pl", + "pt", + "ro", + "ru", + "sk", + "sl", + "so", + "sq", + "sv", + "sw", + "ta", + "te", + "th", + "tl", + "tr", + "uk", + "ur", + "vi", + "zh-cn", + "zh-tw" + }; + + private Map wordLangProbMap = new HashMap(); + + private List langlist = new LinkedList(); + + private Map langmap = new HashMap(); + + private double alpha; + + private double alpha_width; + + private int n_trial; + + private double[] priorMap; + + private int iteration_limit; + + private double prob_threshold; + + private double conv_threshold; + + private int base_freq; + + private Pattern filterPattern; + + @Inject + public LangdetectService(Settings settings) { + super(settings); + } + + @Override + protected void doStart() throws ElasticsearchException { + load(settings); + this.priorMap = null; + this.n_trial = settings.getAsInt("number_of_trials", 7); + this.alpha = settings.getAsDouble("alpha", 0.5); + this.alpha_width = settings.getAsDouble("alpha_width", 0.05); + this.iteration_limit = settings.getAsInt("iteration_limit", 10000); + this.prob_threshold = settings.getAsDouble("prob_threshold", 0.1); + this.conv_threshold = settings.getAsDouble("conv_threshold", 0.99999); + this.base_freq = settings.getAsInt("base_freq", 10000); + this.filterPattern = settings.get("pattern") != null ? + Pattern.compile(settings.get("pattern"),Pattern.UNICODE_CHARACTER_CLASS) : null; + } + + @Override + protected void doStop() throws ElasticsearchException { + } + + @Override + protected void doClose() throws ElasticsearchException { + } + + public Settings getSettings() { + return settings; + } + + private void load(Settings settings) { + try { + String[] keys = DEFAULT_LANGUAGES; + if (settings.get("languages") != null) { + keys = settings.get("languages").split(","); + } + int index = 0; + int size = keys.length; + for (String key : keys) { + if (key != null && !key.isEmpty()) { + loadProfileFromResource(key, index++, size); + } + } + logger.debug("language detection service installed for {}", langlist); + } catch (Exception e) { + logger.error(e.getMessage(), e); + throw new ElasticsearchException(e.getMessage()); + } + try { + // map by settings + Settings map = ImmutableSettings.EMPTY; + if (settings.getByPrefix("map.") != null) { + map = ImmutableSettings.settingsBuilder().put(settings.getByPrefix("map.")).build(); + } + if (map.getAsMap().isEmpty()) { + // is in "map" a resource name? + String s = settings.get("map") != null ? settings.get("map") : "/langdetect/language.json"; + InputStream in = getClass().getResourceAsStream(s); + if (in != null) { + map = ImmutableSettings.settingsBuilder().loadFromStream(s, in).build(); + } + } + this.langmap = map.getAsMap(); + } catch (Exception e) { + logger.error(e.getMessage(), e); + throw new ElasticsearchException(e.getMessage()); + } + } + + public void loadProfileFromResource(String resource, int index, int langsize) throws IOException { + InputStream in = getClass().getResourceAsStream("/langdetect/" + resource); + if (in == null) { + throw new IOException("profile '" + resource + "' not found"); + } + ObjectMapper mapper = new ObjectMapper(); + LangProfile profile = mapper.readValue(in, LangProfile.class); + addProfile(profile, index, langsize); + } + + public void addProfile(LangProfile profile, int index, int langsize) throws IOException { + String lang = profile.getName(); + if (langlist.contains(lang)) { + throw new IOException("duplicate of the same language profile: " + lang); + } + langlist.add(lang); + for (String word : profile.getFreq().keySet()) { + if (!wordLangProbMap.containsKey(word)) { + wordLangProbMap.put(word, new double[langsize]); + } + int length = word.length(); + if (length >= 1 && length <= 3) { + double prob = profile.getFreq().get(word).doubleValue() / profile.getNWords()[length - 1]; + wordLangProbMap.get(word)[index] = prob; + } + } + } + + /** + * Set prior information about language probabilities. + * + * @param priorMap the priorMap to set + * @throws org.xbib.elasticsearch.index.analysis.langdetect.LanguageDetectionException + */ + public void setPriorMap(HashMap priorMap) throws LanguageDetectionException { + this.priorMap = new double[langlist.size()]; + double sump = 0; + for (int i = 0; i < this.priorMap.length; ++i) { + String lang = langlist.get(i); + if (priorMap.containsKey(lang)) { + double p = priorMap.get(lang); + if (p < 0) { + throw new LanguageDetectionException("Prior probability must be non-negative"); + } + this.priorMap[i] = p; + sump += p; + } + } + if (sump <= 0) { + throw new LanguageDetectionException("More one of prior probability must be non-zero"); + } + for (int i = 0; i < this.priorMap.length; ++i) { + this.priorMap[i] /= sump; + } + } + + public List detectAll(String text) throws LanguageDetectionException { + List languages = new ArrayList(); + if (filterPattern != null && !filterPattern.matcher(text).matches()) { + return languages; + } + List list = new ArrayList(); + languages = sortProbability(languages, detectBlock(list, text)); + return languages.subList(0, Math.min(languages.size(), settings.getAsInt("max", languages.size()))); + } + + private double[] detectBlock(List list, String text) throws LanguageDetectionException { + // clean all non-work characters from text + text = text.replaceAll(word.pattern(), " "); + extractNGrams(list, text); + double[] langprob = new double[langlist.size()]; + if (list.isEmpty()) { + //throw new LanguageDetectionException("no features in text"); + return langprob; + } + Random rand = new Random(); + Long seed = 0L; + rand.setSeed(seed); + for (int t = 0; t < n_trial; ++t) { + double[] prob = initProbability(); + double a = this.alpha + rand.nextGaussian() * alpha_width; + for (int i = 0; ; ++i) { + int r = rand.nextInt(list.size()); + updateLangProb(prob, list.get(r), a); + if (i % 5 == 0) { + if (normalizeProb(prob) > conv_threshold || i >= iteration_limit) { + break; + } + } + } + for (int j = 0; j < langprob.length; ++j) { + langprob[j] += prob[j] / n_trial; + } + } + return langprob; + } + + private double[] initProbability() { + double[] prob = new double[langlist.size()]; + if (priorMap != null) { + System.arraycopy(priorMap, 0, prob, 0, prob.length); + } else { + for (int i = 0; i < prob.length; ++i) { + prob[i] = 1.0 / langlist.size(); + } + } + return prob; + } + + private void extractNGrams(List list, String text) { + NGram ngram = new NGram(); + for (int i = 0; i < text.length(); ++i) { + ngram.addChar(text.charAt(i)); + for (int n = 1; n <= NGram.N_GRAM; ++n) { + String w = ngram.get(n); + if (w != null && wordLangProbMap.containsKey(w)) { + list.add(w); + } + } + } + } + + private boolean updateLangProb(double[] prob, String word, double alpha) { + if (word == null || !wordLangProbMap.containsKey(word)) { + return false; + } + double[] langProbMap = wordLangProbMap.get(word); + double weight = alpha / base_freq; + for (int i = 0; i < prob.length; ++i) { + prob[i] *= weight + langProbMap[i]; + } + return true; + } + + private double normalizeProb(double[] prob) { + double maxp = 0, sump = 0; + for (double aProb : prob) { + sump += aProb; + } + for (int i = 0; i < prob.length; ++i) { + double p = prob[i] / sump; + if (maxp < p) { + maxp = p; + } + prob[i] = p; + } + return maxp; + } + + private List sortProbability(List list, double[] prob) { + for (int j = 0; j < prob.length; ++j) { + double p = prob[j]; + if (p > prob_threshold) { + for (int i = 0; i <= list.size(); ++i) { + if (i == list.size() || list.get(i).getProbability() < p) { + String code = langlist.get(j); + if (langmap != null && langmap.containsKey(code)) { + code = langmap.get(code); + } + list.add(i, new Language(code, p)); + break; + } + } + } + } + return list; + } + +} diff --git a/src/main/java/org/xbib/elasticsearch/plugin/langdetect/RegisterLangdetectType.java b/src/main/java/org/xbib/elasticsearch/module/langdetect/RegisterLangdetectType.java similarity index 72% rename from src/main/java/org/xbib/elasticsearch/plugin/langdetect/RegisterLangdetectType.java rename to src/main/java/org/xbib/elasticsearch/module/langdetect/RegisterLangdetectType.java index de86a23..6a119c3 100644 --- a/src/main/java/org/xbib/elasticsearch/plugin/langdetect/RegisterLangdetectType.java +++ b/src/main/java/org/xbib/elasticsearch/module/langdetect/RegisterLangdetectType.java @@ -1,4 +1,4 @@ -package org.xbib.elasticsearch.plugin.langdetect; +package org.xbib.elasticsearch.module.langdetect; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; @@ -6,15 +6,14 @@ import org.elasticsearch.index.Index; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.settings.IndexSettings; -import org.xbib.elasticsearch.common.langdetect.Detector; import org.xbib.elasticsearch.index.mapper.langdetect.LangdetectMapper; public class RegisterLangdetectType extends AbstractIndexComponent { @Inject public RegisterLangdetectType(Index index, @IndexSettings Settings indexSettings, - MapperService mapperService, Detector detector) { + MapperService mapperService) { super(index, indexSettings); - mapperService.documentMapperParser().putTypeParser("langdetect", new LangdetectMapper.TypeParser(detector)); + mapperService.documentMapperParser().putTypeParser("langdetect", new LangdetectMapper.TypeParser()); } } diff --git a/src/main/java/org/xbib/elasticsearch/plugin/langdetect/LangdetectPlugin.java b/src/main/java/org/xbib/elasticsearch/plugin/langdetect/LangdetectPlugin.java index 6f2ff15..a663655 100644 --- a/src/main/java/org/xbib/elasticsearch/plugin/langdetect/LangdetectPlugin.java +++ b/src/main/java/org/xbib/elasticsearch/plugin/langdetect/LangdetectPlugin.java @@ -7,8 +7,8 @@ import org.elasticsearch.rest.RestModule; import org.xbib.elasticsearch.action.langdetect.LangdetectAction; import org.xbib.elasticsearch.action.langdetect.TransportLangdetectAction; -import org.xbib.elasticsearch.common.langdetect.Detector; import org.xbib.elasticsearch.module.langdetect.LangdetectModule; +import org.xbib.elasticsearch.module.langdetect.LangdetectService; import org.xbib.elasticsearch.rest.action.langdetect.RestLangdetectAction; import java.util.Collection; @@ -41,7 +41,7 @@ public void onModule(ActionModule module) { @Override public Collection> services() { Collection> services = newArrayList(); - services.add(Detector.class); + services.add(LangdetectService.class); return services; } diff --git a/src/main/java/org/xbib/elasticsearch/rest/action/langdetect/RestLangdetectAction.java b/src/main/java/org/xbib/elasticsearch/rest/action/langdetect/RestLangdetectAction.java index 1488988..f0dcd38 100644 --- a/src/main/java/org/xbib/elasticsearch/rest/action/langdetect/RestLangdetectAction.java +++ b/src/main/java/org/xbib/elasticsearch/rest/action/langdetect/RestLangdetectAction.java @@ -18,14 +18,13 @@ public class RestLangdetectAction extends BaseRestHandler { @Inject public RestLangdetectAction(Settings settings, Client client, RestController controller) { - super(settings, client); + super(settings, controller, client); controller.registerHandler(POST, "/_langdetect", this); } @Override public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) { - LangdetectRequest langdetectRequest = new LangdetectRequest().setText(request.content()); + LangdetectRequest langdetectRequest = new LangdetectRequest().setText(request.content().toUtf8()); client.admin().indices().execute(LangdetectAction.INSTANCE, langdetectRequest, new RestStatusToXContentListener(channel)); - } } \ No newline at end of file diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/af b/src/main/resources/langdetect/af similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/af rename to src/main/resources/langdetect/af diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/ar b/src/main/resources/langdetect/ar similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/ar rename to src/main/resources/langdetect/ar diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/bg b/src/main/resources/langdetect/bg similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/bg rename to src/main/resources/langdetect/bg diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/bn b/src/main/resources/langdetect/bn similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/bn rename to src/main/resources/langdetect/bn diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/cs b/src/main/resources/langdetect/cs similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/cs rename to src/main/resources/langdetect/cs diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/da b/src/main/resources/langdetect/da similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/da rename to src/main/resources/langdetect/da diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/de b/src/main/resources/langdetect/de similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/de rename to src/main/resources/langdetect/de diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/el b/src/main/resources/langdetect/el similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/el rename to src/main/resources/langdetect/el diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/en b/src/main/resources/langdetect/en similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/en rename to src/main/resources/langdetect/en diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/es b/src/main/resources/langdetect/es similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/es rename to src/main/resources/langdetect/es diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/et b/src/main/resources/langdetect/et similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/et rename to src/main/resources/langdetect/et diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/fa b/src/main/resources/langdetect/fa similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/fa rename to src/main/resources/langdetect/fa diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/fi b/src/main/resources/langdetect/fi similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/fi rename to src/main/resources/langdetect/fi diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/fr b/src/main/resources/langdetect/fr similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/fr rename to src/main/resources/langdetect/fr diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/gu b/src/main/resources/langdetect/gu similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/gu rename to src/main/resources/langdetect/gu diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/he b/src/main/resources/langdetect/he similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/he rename to src/main/resources/langdetect/he diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/hi b/src/main/resources/langdetect/hi similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/hi rename to src/main/resources/langdetect/hi diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/hr b/src/main/resources/langdetect/hr similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/hr rename to src/main/resources/langdetect/hr diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/hu b/src/main/resources/langdetect/hu similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/hu rename to src/main/resources/langdetect/hu diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/id b/src/main/resources/langdetect/id similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/id rename to src/main/resources/langdetect/id diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/it b/src/main/resources/langdetect/it similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/it rename to src/main/resources/langdetect/it diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/ja b/src/main/resources/langdetect/ja similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/ja rename to src/main/resources/langdetect/ja diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/kn b/src/main/resources/langdetect/kn similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/kn rename to src/main/resources/langdetect/kn diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/ko b/src/main/resources/langdetect/ko similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/ko rename to src/main/resources/langdetect/ko diff --git a/src/main/resources/langdetect/language.json b/src/main/resources/langdetect/language.json new file mode 100644 index 0000000..8aa8442 --- /dev/null +++ b/src/main/resources/langdetect/language.json @@ -0,0 +1,182 @@ +{ + "aa" : "aa", + "ab" : "ab", + "ae" : "ae", + "af" : "af", + "ak" : "ak", + "ai" : "ai", + "al" : "al", + "am" : "am", + "an" : "an", + "ar" : "ar", + "as" : "as", + "av" : "av", + "ay" : "ay", + "az" : "az", + "ba" : "ba", + "be" : "be", + "bg" : "bg", + "bh" : "bh", + "bi" : "bi", + "bm" : "bm", + "bn" : "bn", + "bo" : "bo", + "br" : "br", + "bs" : "bs", + "ca" : "ca", + "ce" : "ce", + "ch" : "ch", + "co" : "co", + "cr" : "cr", + "cs" : "cs", + "cu" : "cu", + "cv" : "cv", + "cy" : "cy", + "da" : "da", + "de" : "de", + "dv" : "dv", + "dz" : "dz", + "el" : "el", + "en" : "en", + "eo" : "eo", + "es" : "es", + "et" : "et", + "eu" : "eu", + "ew" : "ew", + "fa" : "fa", + "fi" : "fi", + "fj" : "fj", + "fo" : "fo", + "fr" : "fr", + "fy" : "fy", + "ga" : "ga", + "gd" : "gd", + "gl" : "gl", + "gn" : "gn", + "gs" : "gs", + "gu" : "gu", + "gv" : "gv", + "ha" : "ha", + "hi" : "hi", + "ho" : "ho", + "hr" : "hr", + "ht" : "ht", + "hu" : "hu", + "hy" : "hy", + "hz" : "hz", + "ia" : "ia", + "id" : "id", + "ie" : "ie", + "ig" : "ig", + "ii" : "ii", + "ik" : "ik", + "is" : "is", + "it" : "it", + "iu" : "iu", + "ja" : "ja", + "jv" : "jv", + "ka" : "ka", + "kg" : "kg", + "ki" : "ki", + "kj" : "kj", + "kk" : "kk", + "kl" : "kl", + "km" : "km", + "kn" : "kn", + "ko" : "ko", + "kr" : "kr", + "ks" : "ks", + "ku" : "ku", + "kv" : "kv", + "kw" : "kw", + "ky" : "ky", + "la" : "la", + "lb" : "lb", + "lg" : "lg", + "li" : "li", + "ln" : "ln", + "lo" : "lo", + "lt" : "lt", + "lv" : "lv", + "mg" : "mg", + "mi" : "mi", + "mk" : "mk", + "ml" : "ml", + "mn" : "mn", + "mr" : "mr", + "ms" : "ms", + "mt" : "mt", + "my" : "my", + "na" : "na", + "nb" : "nb", + "nd" : "nd", + "ne" : "ne", + "ng" : "ng", + "nl" : "nl", + "no" : "no", + "nv" : "nv", + "ny" : "ny", + "oc" : "oc", + "oj" : "oj", + "om" : "om", + "or" : "or", + "os" : "os", + "pa" : "pa", + "pi" : "pi", + "pl" : "pl", + "ps" : "ps", + "pt" : "pt", + "qt" : "qt", + "qu" : "qu", + "rm" : "rm", + "rn" : "rn", + "ro" : "ro", + "ru" : "ru", + "rw" : "rw", + "sa" : "sa", + "sc" : "sc", + "sd" : "sd", + "sg" : "sg", + "si" : "si", + "sk" : "sk", + "sl" : "sl", + "sm" : "sm", + "sn" : "sn", + "so" : "so", + "sq" : "sq", + "sr" : "sr", + "ss" : "ss", + "st" : "st", + "su" : "su", + "sv" : "sv", + "sw" : "sw", + "ta" : "ta", + "te" : "te", + "tg" : "tg", + "th" : "th", + "ti" : "ti", + "tk" : "tk", + "tl" : "tl", + "tn" : "tn", + "to" : "to", + "tr" : "tr", + "ts" : "ts", + "tt" : "tt", + "tw" : "tw", + "ty" : "ty", + "ug" : "ug", + "uk" : "uk", + "ur" : "ur", + "uz" : "uz", + "ve" : "ve", + "vi" : "vi", + "vo" : "vo", + "wa" : "wa", + "wo" : "wo", + "xh" : "xh", + "yi" : "yi", + "za" : "za", + "zh-cn" : "zh-cn", + "zh-tw" : "zh-tw", + "zu" : "zu" +} \ No newline at end of file diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/lt b/src/main/resources/langdetect/lt similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/lt rename to src/main/resources/langdetect/lt diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/lv b/src/main/resources/langdetect/lv similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/lv rename to src/main/resources/langdetect/lv diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/mk b/src/main/resources/langdetect/mk similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/mk rename to src/main/resources/langdetect/mk diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/ml b/src/main/resources/langdetect/ml similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/ml rename to src/main/resources/langdetect/ml diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/mr b/src/main/resources/langdetect/mr similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/mr rename to src/main/resources/langdetect/mr diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/ne b/src/main/resources/langdetect/ne similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/ne rename to src/main/resources/langdetect/ne diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/nl b/src/main/resources/langdetect/nl similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/nl rename to src/main/resources/langdetect/nl diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/no b/src/main/resources/langdetect/no similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/no rename to src/main/resources/langdetect/no diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/pa b/src/main/resources/langdetect/pa similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/pa rename to src/main/resources/langdetect/pa diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/pl b/src/main/resources/langdetect/pl similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/pl rename to src/main/resources/langdetect/pl diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/pt b/src/main/resources/langdetect/pt similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/pt rename to src/main/resources/langdetect/pt diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/ro b/src/main/resources/langdetect/ro similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/ro rename to src/main/resources/langdetect/ro diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/ru b/src/main/resources/langdetect/ru similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/ru rename to src/main/resources/langdetect/ru diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/sk b/src/main/resources/langdetect/sk similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/sk rename to src/main/resources/langdetect/sk diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/sl b/src/main/resources/langdetect/sl similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/sl rename to src/main/resources/langdetect/sl diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/so b/src/main/resources/langdetect/so similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/so rename to src/main/resources/langdetect/so diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/sq b/src/main/resources/langdetect/sq similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/sq rename to src/main/resources/langdetect/sq diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/sv b/src/main/resources/langdetect/sv similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/sv rename to src/main/resources/langdetect/sv diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/sw b/src/main/resources/langdetect/sw similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/sw rename to src/main/resources/langdetect/sw diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/ta b/src/main/resources/langdetect/ta similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/ta rename to src/main/resources/langdetect/ta diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/te b/src/main/resources/langdetect/te similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/te rename to src/main/resources/langdetect/te diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/th b/src/main/resources/langdetect/th similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/th rename to src/main/resources/langdetect/th diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/tl b/src/main/resources/langdetect/tl similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/tl rename to src/main/resources/langdetect/tl diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/tr b/src/main/resources/langdetect/tr similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/tr rename to src/main/resources/langdetect/tr diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/uk b/src/main/resources/langdetect/uk similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/uk rename to src/main/resources/langdetect/uk diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/ur b/src/main/resources/langdetect/ur similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/ur rename to src/main/resources/langdetect/ur diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/vi b/src/main/resources/langdetect/vi similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/vi rename to src/main/resources/langdetect/vi diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/zh-cn b/src/main/resources/langdetect/zh-cn similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/zh-cn rename to src/main/resources/langdetect/zh-cn diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/zh-tw b/src/main/resources/langdetect/zh-tw similarity index 100% rename from src/main/resources/org/xbib/elasticsearch/common/langdetect/zh-tw rename to src/main/resources/langdetect/zh-tw diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/languages.properties b/src/main/resources/org/xbib/elasticsearch/common/langdetect/languages.properties deleted file mode 100644 index 313f075..0000000 --- a/src/main/resources/org/xbib/elasticsearch/common/langdetect/languages.properties +++ /dev/null @@ -1,53 +0,0 @@ -af -ar -bg -bn -cs -da -de -el -en -es -et -fa -fi -fr -gu -he -hi -hr -hu -id -it -ja -kn -ko -lt -lv -mk -ml -mr -ne -nl -no -pa -pl -pt -ro -ru -sk -sl -so -sq -sv -sw -ta -te -th -tl -tr -uk -ur -vi -zh-cn -zh-tw diff --git a/src/main/resources/org/xbib/elasticsearch/common/langdetect/messages.properties b/src/main/resources/org/xbib/elasticsearch/common/langdetect/messages.properties deleted file mode 100644 index b9574a0..0000000 --- a/src/main/resources/org/xbib/elasticsearch/common/langdetect/messages.properties +++ /dev/null @@ -1,128 +0,0 @@ -NGram.CJK_KANJI_EXCLUDE=\u0020\uFF08\uFF09 -NGram.LATIN1_EXCLUDE=\u00A0\u00AB\u00B0\u00BB -NGram.KANJI_1_0=\u4F7C\u6934 -NGram.KANJI_1_2=\u88CF\u95B2 -NGram.KANJI_1_4=\u7027\u7DCB -NGram.KANJI_1_8=\u4E80\u4E9C\u4EEE\u5263\u5264\u5270\u52C5\u52E7\u52F2\u53B3\u5449\u58CA\u58CC\u5968\u59C9\u59EB\u5D8B\u5DE3\u5E30\u6075\u622F\u623B\u6255\u629C\u629E\u62DD\u62E1\u633F\u635C\u63FA\u6442\u6589\u658E\u6669\u66A6\u66FD\u6804\u685C\u6B69\u6B6F\u6BBB\u6C37\u6C5A\u6D44\u6E09\u6E0B\u6E13\u6EDD\u713C\u72A0\u731F\u7363\u7A32\u7A42\u7A93\u7ADC\u7C8B\u7C9B\u7DD1\u7E01\u7E04\u7E26\u7E4A\u7E4B\u7E70\u8074\u8107\u8133\u81D3\u820E\u8217\u8358\u83D3\u85AC\u8987\u899A\u8B21\u8B72\u8B83\u8CDB\u9045\u90F7\u91C8\u9271\u9283\u92AD\u9665\u967A\u96A0\u96A3\u96B7\u970A\u983C\u9854\u9855\u99C6\u9A12\u9ED9\u9F62 -NGram.KANJI_1_11=\u67D8\u831C -NGram.KANJI_1_12=\u5742\u57FC\u5800 -NGram.KANJI_1_13=\u4E3C\u4E98\u4FE3\u4FF5\u5072\u51A8\u53A9\u5451\u546A\u5504\u5516\u55A9\u55B0\u5618\u5642\u565B\u567A\u56A2\u57F4\u5840\u5841\u58F1\u59F6\u5A2F\u5B22\u5B8D\u5DCC\u5EFB\u5F10\u60A9\u60E3\u61D0\u62F6\u63B4\u63BB\u63C3\u6681\u685F\u6955\u6962\u696F\u698A\u698E\u69FB\u6A2B\u6A7F\u6B53\u6BD8\u6D99\u6E07\u7460\u7473\u7560\u7573\u758E\u7690\u7815\u783A\u7962\u7A4F\u7A63\u7AEA\u7BED\u7CA7\u7D18\u7D3A\u7E4D\u8061\u8218\u8276\u82C5\u8597\u85AB\u86CD\u874B\u88FE\u8ACF\u8B90\u8D0B\u8FBF\u9013\u9061\u914E\u9154\u918D\u9190\u91A4\u91B8\u9262\u929A\u92ED\u92F3\u932C\u96EB\u96F0\u976D\u97EE\u981A\u99C4\u9A28\u9AC4\u9B8E\u9C10\u9D0E\u9D5C\u9D8F\u9E78\u9EB9\u9EBA\u9EBF -NGram.KANJI_1_14=\u5F66\u7984\u7985 -NGram.KANJI_1_16=\u5861\u7B25\u844E\u9419\u9D07 -NGram.KANJI_1_18=\u5039\u514E\u51E7\u51EA\u5301\u5302\u5859\u58F7\u59AC\u5C2D\u5CA8\u5EFC\u6357\u64B9\u67CA\u6802\u6834\u68BC\u6900\u6919\u691B\u69D9\u6AE8\u6D9C\u6E8C\u6F09\u6F45\u701E\u7026\u7114\u72DB\u7577\u75E9\u783F\u7895\u7A50\u7AC3\u7B48\u7B86\u7BAA\u7C7E\u7C82\u7C8D\u7CCE\u7D2C\u7F6B\u7FEB\u8557\u85AE\u86CE\u877F\u8997\u8ACC\u8CB0\u8CCE\u8FE9\u9197\u920E\u9266\u927E\u92F2\u9306\u9453\u9784\u982C\u9834\u99C8\u9BF5\u9C2F\u9D2C -NGram.KANJI_1_22=\u6762\u6A17\u887F -NGram.KANJI_1_27=\u4E21\u4E57\u4ECF\u4F1D\u4FA1\u4FF3\u5024\u50CD\u5150\u5186\u51E6\u52B4\u52B9\u5358\u53CE\u55B6\u56E3\u56F2\u56F3\u570F\u5727\u5869\u5897\u58F2\u5909\u5B9F\u5BDB\u5BFE\u5C02\u5DFB\u5E2F\u5E81\u5E83\u5EC3\u5F3E\u5F93\u5FB3\u5FB4\u5FDC\u60AA\u6226\u6238\u6271\u62E0\u6319\u63B2\u6483\u64AE\u67A0\u67FB\u691C\u697D\u69D8\u6A29\u6B73\u6B74\u6BCE\u6C17\u6CA2\u6D5C\u6E08\u6E80\u702C\u7523\u767A\u770C\u7D4C\u7D75\u7D76\u7D99\u7D9A\u7DCF\u8535\u8846\u89A7\u89B3\u8A33\u8AAC\u8AAD\u8C4A\u8EE2\u8EFD\u8FBA\u8FBC\u9244\u9332\u95A2\u95D8\u96D1\u99C5\u9A13\u9ED2 -NGram.KANJI_1_29=\u4F0E\u4FFA\u5036\u53E1\u54B2\u5506\u583A\u5C3B\u5CAC\u5CE0\u5CEF\u6803\u68B6\u6A0B\u6A8E\u73C2\u7551\u7826\u7881\u79B0\u7B39\u8429\u8599\u8FBB\u9162\u95C7\u9688\u96BC\u9AEA\u9DF2 -NGram.KANJI_1_31=\u5553\u938C -NGram.KANJI_1_35=\u51B4\u564C\u57DC\u5B2C\u6822\u685D\u690B\u6973\u6C93\u7511\u7887\u7A17\u83D6\u847A\u8494\u8526\u854E\u85C1\u86F8\u88B4\u93A7\u9B92\u9C39\u9C48\u9C52 -NGram.KANJI_2_0=\u4E2B\u4EC3\u4F09\u4F57\u4F6F\u4F70\u4FD1\u4FDA\u500C\u5043\u516E\u5189\u5241\u530D\u5310\u5412\u54AB\u54AF\u5514\u5556\u55B1\u561F\u573B\u586D\u587D\u58C5\u58D1\u5914\u5A62\u5A6A\u5AE6\u5B40\u5B5B\u5B70\u5BB8\u5CD2\u5D01\u5D34\u5E11\u5EA0\u5F0B\u5F2D\u5F87\u607F\u621B\u6221\u6289\u63A3\u6452\u646D\u64D8\u652B\u6600\u6631\u6641\u66F7\u6773\u67B8\u67DD\u67DE\u6829\u68FB\u69AD\u6A47\u6C10\u6C68\u6C74\u6C85\u6CD3\u6D31\u6D93\u6D94\u6DB8\u6DBF\u6DC5\u6E6E\u6EA7\u6EB4\u6EC2\u6F2A\u6F2F\u6FB9\u6FC2\u6FDB\u6FEE\u70AF\u70FD\u7166\u726F\u729B\u739F\u73DE\u740A\u746D\u749C\u749F\u74E0\u759D\u75A3\u75CD\u75DE\u7600\u7620\u7688\u7738\u7762\u776B\u777D\u77E3\u781D\u7837\u78A3\u7946\u7B60\u7F44\u7F54\u7F5F\u7FAF\u8026\u807F\u80C4\u80DB\u80ED\u81E7\u824B\u82B7\u82E3\u8392\u846D\u84D3\u8548\u85B9\u86DE\u873F\u8753\u8782\u87AB\u87B3\u87D1\u87E0\u87FE\u8821\u88D8\u88E8\u8913\u891A\u892B\u8983\u8C3F\u8C49\u8C82\u8D6D\u8DE4\u8E1D\u8E1E\u8E7C\u8FE5\u8FE8\u9005\u9035\u9050\u9082\u9083\u9095\u90E2\u911E\u91AE\u91B4\u93D6\u9621\u968D\u96B9\u96D2\u9711\u9713\u973E\u9AB0\u9AB7\u9AE6\u9B03\u9B23\u9EDC\u9EEF -NGram.KANJI_2_1=\u4E82\u4F48\u4F54\u50F9\u5167\u528D\u52DE\u532F\u537B\u53C3\u5433\u555F\u55AE\u56B4\u570D\u5716\u58D3\u58DE\u5920\u5967\u5A1B\u5BEB\u5BEC\u5C08\u5C0D\u5C46\u5C6C\u5CFD\u5E36\u5E6B\u5EC8\u5EF3\u5F48\u5F91\u5F9E\u5FB5\u6046\u60E1\u61F7\u6232\u6236\u64C7\u64CA\u64D4\u64DA\u64F4\u651D\u6578\u65B7\u6649\u6A13\u6A23\u6A6B\u6A94\u6AA2\u6B0A\u6B50\u6B61\u6B72\u6B77\u6B78\u6C92\u6EAB\u6EFF\u6FD5\u6FDF\u71DF\u722D\u72C0\u734E\u737B\u746A\u7522\u773E\u78BC\u7A69\u7C3D\u7CB5\u7D55\u7D72\u7DA0\u7DAB\u7DE3\u7E5E\u7E6A\u7E7C\u7E8C\u8072\u807D\u8085\u812B\u8166\u8173\u81D8\u8209\u820A\u8332\u838A\u840A\u85E5\u860B\u8655\u865B\u88DD\u89BA\u89BD\u89C0\u8AAA\u8B6F\u8B7D\u8B8A\u8B93\u8C50\u8CF4\u8E64\u8F15\u8F49\u8FA6\u8FAD\u9109\u9130\u91AB\u91CB\u92B7\u9304\u9322\u95CA\u96A8\u96AA\u96B1\u96B8\u96D6\u96D9\u96DC\u9748\u975C\u986F\u9918\u99DB\u9A57\u9B25\u9EA5\u9EC3\u9EDE\u9F52 -NGram.KANJI_2_4=\u514C\u51AA\u5614\u56AE\u56C2\u582F\u58FA\u5B0C\u5D11\u5DD2\u5DD6\u5E40\u5E5F\u5EEC\u6137\u6417\u6488\u64F2\u652A\u6582\u6689\u689F\u68D7\u69D3\u6A97\u6AB8\u6ABB\u6AC3\u6ADA\u6B7F\u6BB2\u6EA5\u6EC4\u6EF2\u7009\u701D\u7028\u703E\u7165\u71BE\u721B\u7463\u7464\u7469\u7515\u7526\u75FA\u7621\u779E\u79B1\u7A1F\u7AC4\u7AC7\u7B8F\u7BE9\u7D2E\u7D68\u7D8F\u7DB8\u7DBA\u7E46\u7E79\u7F4C\u7F88\u8070\u8073\u8076\u81BE\u82BB\u83A2\u858A\u8591\u861A\u8778\u87EC\u8805\u880D\u893B\u8A1B\u8A25\u8A36\u8A85\u8AA6\u8B17\u8B28\u8CB6\u8CE4\u8D16\u8D1B\u8ECB\u9112\u9214\u9249\u93AC\u9594\u9598\u95BB\u95D5\u965E\u96B4\u97DC\u9821\u9824\u9921\u9952\u9A55\u9A5B\u9B1A\u9C13\u9D09\u9DAF\u9E1A\u9E75\u9F67 -NGram.KANJI_2_9=\u4E9F\u4F6C\u4FDE\u4FFE\u5029\u5140\u51A2\u5345\u539D\u53FB\u54C7\u5599\u560E\u561B\u563B\u566C\u5676\u5729\u574D\u57E4\u595A\u598D\u5A1F\u5A25\u5A77\u5AB2\u5AD6\u5BF0\u5C2C\u5CEA\u5E37\u5F08\u6059\u606A\u6096\u609A\u62A8\u6555\u6556\u66E6\u675E\u68E3\u69BB\u6BCB\u6BD3\u6C1F\u6C26\u6C81\u6DC4\u6DDE\u6E32\u6E44\u6E4D\u6F33\u6F7C\u6FA7\u701A\u701B\u715C\u741B\u7428\u7480\u74A8\u7504\u752C\u768B\u76CE\u78CA\u78FA\u79BA\u7C27\u8046\u81FB\u8331\u8393\u83C1\u8403\u8438\u843C\u8446\u85B0\u87D2\u8862\u8DC6\u9074\u9131\u9672\u96EF\u9704\u9706\u977C\u9ABC\u9E92\u9ECF -NGram.KANJI_2_10=\u51BD\u5704\u7350\u73A5 -NGram.KANJI_2_11=\u4E15\u4EA2\u4F5A\u50D6\u5349\u53DF\u5484\u5958\u5B34\u5B5A\u5C91\u5E1B\u5F77\u61CB\u61FF\u620C\u620D\u622E\u6248\u6538\u660A\u664F\u678B\u67E9\u69B7\u69C3\u6CB1\u6CD7\u6D5A\u6DAA\u6DC7\u7099\u71EE\u7325\u7425\u7455\u747E\u749E\u75B5\u7678\u7693\u76C2\u77B0\u77BF\u78CB\u7957\u795A\u797A\u7A79\u7B08\u7B75\u7BB4\u7F9A\u7FB2\u7FDF\u80E5\u81BA\u8340\u837C\u8398\u8559\u85A8\u86DF\u8734\u8882\u88F4\u8936\u900D\u907D\u9642\u96C9\u9AFB\u9E9D\u9EBE -NGram.KANJI_2_12=\u5F57\u7940 -NGram.KANJI_2_13=\u5191\u7791\u792C\u7D46 -NGram.KANJI_2_15=\u5713\u58FD\u5D17\u5D19\u5DBC\u5F4C\u6191\u64A5\u687F\u69AE\u6AFB\u6EEC\u6F3F\u6FE4\u6FF1\u6FFE\u700B\u74CA\u76E1\u76E7\u7926\u792B\u79AE\u7AA9\u7C43\u7C4C\u7C64\u7DBD\u81A0\u856D\u8594\u8606\u8A62\u8AF7\u8CC8\u8CE3\u8D99\u8F1B\u8F3B\u9059\u9127\u9264\u947D\u95A9\u97CB\u980C\u9838\u9846\u99AE\u9A19\u9B06\u9B91\u9F4A\u9F4B -NGram.KANJI_2_16=\u4E69\u4EC4\u4EDF\u4EF3\u4F0B\u4F5E\u5000\u5028\u50E5\u513B\u5157\u51DC\u52D7\u530F\u5379\u53F5\u5471\u5477\u5555\u555C\u557B\u5594\u55B2\u55C9\u560D\u5616\u562E\u5630\u5653\u5657\u566F\u56A8\u56B6\u5820\u5880\u58CE\u58D9\u5950\u5969\u596D\u599E\u59B3\u59CD\u59D2\u5A40\u5AA7\u5ABC\u5AD7\u5AD8\u5B0B\u5B24\u5B38\u5B53\u5C5C\u5D06\u5D47\u5D94\u5D9D\u5E57\u5EC4\u5F46\u5FAC\u60BD\u60D8\u6123\u615D\u615F\u6175\u618A\u61AB\u61E3\u623E\u6308\u636B\u645F\u6519\u6595\u6698\u66B8\u67D9\u6840\u695D\u696E\u6979\u69C1\u69E8\u6AEC\u6AFA\u6B5F\u6CAC\u6CE0\u6CEF\u6D0C\u6D36\u6DD2\u6DD9\u6DE6\u6DEC\u6E5F\u6FA0\u6FEC\u7156\u71C4\u71DC\u71EC\u71FC\u720D\u7230\u7292\u7296\u72A2\u72CE\u7357\u737A\u7380\u7386\u73A8\u73EE\u743F\u74A6\u74CF\u74D4\u74DA\u755A\u75A5\u75B3\u75C2\u75E0\u75F1\u75FF\u7601\u7609\u7646\u7658\u769A\u76B0\u774F\u775C\u778B\u77BD\u77C7\u7843\u787F\u78F4\u79C8\u7A88\u7A95\u7AFD\u7B1E\u7B67\u7B9D\u7BCC\u7C0D\u7C11\u7C37\u7C40\u7C6E\u7CB3\u7CBD\u7D09\u7D31\u7D40\u7D5B\u7D70\u7D91\u7D9E\u7DB0\u7DD9\u7DF9\u7E08\u7E11\u7E1D\u7E35\u7E52\u7FB6\u7FBF\u7FEE\u8012\u801C\u8028\u8052\u8123\u8188\u81C3\u81DA\u81FE\u8210\u82BE\u83A0\u83D4\u8407\u8435\u8477\u849E\u84C6\u84CA\u85F9\u867A\u86B5\u86B6\u86C4\u8706\u8707\u870A\u8768\u87BB\u8831\u8839\u8879\u8921\u8938\u8964\u89A6\u89AC\u8A10\u8A3E\u8AC2\u8ADB\u8AF3\u8B2B\u8B41\u8B4E\u8B5F\u8B6B\u8B92\u8C55\u8C62\u8C73\u8C8A\u8C8D\u8CB2\u8CB3\u8CD2\u8CE1\u8CFB\u8D0D\u8E34\u8E7A\u8E8A\u8ED4\u8EFE\u8F0A\u8F1C\u8F1E\u8F26\u8FAE\u9088\u90C3\u90FE\u9134\u9148\u91D9\u91E9\u9238\u9239\u923D\u924D\u925A\u9296\u92AC\u92BB\u9315\u9319\u931A\u9321\u9370\u9394\u93A2\u93D8\u93E4\u943A\u9477\u9582\u958E\u95A1\u95C8\u95CC\u95D4\u9658\u966C\u970F\u973D\u9744\u975B\u9766\u97A3\u97A6\u97C1\u97C6\u980A\u9837\u9853\u9870\u98AF\u98B3\u98BA\u98E9\u98ED\u9912\u991B\u991E\u993D\u993F\u99D1\u99DF\u9A01\u9A3E\u9A43\u9A4D\u9ACF\u9AE1\u9B22\u9B58\u9C25\u9C3E\u9C54\u9C56\u9D15\u9D23\u9D89\u9DC2\u9DD3\u9E82\u9E8B\u9EA9\u9EE0\u9EF7\u9F07\u9F2F\u9F34\u9F3E\u9F5F\u9F6C -NGram.KANJI_2_18=\u5155\u520E\u55DF\u56C0\u56C1\u5793\u5FD6\u5FF8\u6029\u60FA\u613E\u6147\u615A\u62C8\u6384\u6883\u6894\u68F9\u6AA3\u6AAE\u6AC2\u6E63\u7032\u70A4\u7146\u71FB\u7228\u72F7\u7370\u7441\u74BF\u75B8\u75E3\u7622\u76CD\u7768\u79E3\u7A60\u7B6E\u7BC1\u7C5F\u7D06\u7E2F\u7E39\u8146\u81CF\u8703\u8729\u8737\u87EF\u88D2\u8A22\u8AC4\u8AF6\u8E59\u8F33\u8F42\u9169\u91B1\u9278\u93C3\u93DD\u9460\u946A\u9785\u9AD1\u9B4D\u9B4E\u9C31\u9D12\u9ECC -NGram.KANJI_2_21=\u502A\u544E\u59AE\u59EC\u5D1B\u66A8\u6BD7\u6C76\u6E1D\u70EF\u742A\u7459\u7FE1\u82EF\u8343\u85C9\u8A79\u90DD -NGram.KANJI_2_22=\u4EDE\u4F7B\u504C\u50EE\u52E3\u52F0\u536E\u54A9\u54BB\u54BF\u54C2\u54E6\u550F\u556A\u55E8\u564E\u5664\u5671\u568F\u56DD\u572F\u57A0\u5809\u5924\u59A3\u59A4\u59E3\u5A13\u5A23\u5B51\u5B73\u5C50\u5C8C\u6035\u60C6\u6106\u6215\u62CE\u62FD\u64ED\u6549\u6554\u655D\u659B\u65CE\u65D6\u6615\u6624\u665E\u6677\u669D\u66E9\u6772\u677C\u696B\u6A84\u6AA0\u6BFD\u6C16\u6C86\u6C94\u6CD6\u6D2E\u6D39\u6F78\u6FB6\u705E\u70CA\u7168\u723B\u7256\u7284\u73B3\u740D\u742F\u7498\u74A9\u752D\u75F3\u7634\u768E\u76B4\u76E5\u77A0\u77DC\u781F\u782D\u7AA0\u7BFE\u7FF1\u80AB\u8174\u81EC\u8202\u8222\u8228\u82DC\u8306\u83FD\u8469\u84FF\u859C\u8617\u86B1\u8722\u8C89\u8D67\u8DCE\u8E49\u8E76\u8E87\u8FE2\u8FE4\u8FF8\u9016\u905B\u9174\u982B\u98E7\u9955\u9B32 -NGram.KANJI_2_23=\u4F8F\u5055\u524C\u548E\u5583\u594E\u5CB7\u5ED6\u5F5D\u6021\u66B9\u66F0\u6C55\u6C7E\u6C82\u6E2D\u6EC7\u6ED5\u70B3\u71B9\u72C4\u73C0\u7426\u745C\u748B\u7696\u777F\u79A7\u79B9\u7F8C\u8153\u8339\u8386\u8725\u90B5\u9102\u962E\u9716\u97F6 -NGram.KANJI_2_28=\u5733\u57D4\u838E\u8FEA -NGram.KANJI_2_29=\u50ED\u5F29\u62EE\u6A9C\u7BC6\u80F1\u8129\u8171\u822B\u8AEB -NGram.KANJI_2_30=\u4EB3\u4F15\u4FB7\u5006\u509A\u50A2\u5102\u5109\u5115\u5137\u5138\u513C\u524B\u524E\u5277\u528A\u52E6\u52FB\u5331\u5436\u5443\u54FD\u5538\u555E\u55C6\u55C7\u5679\u5690\u5695\u56C9\u56D1\u56EA\u588A\u58E2\u5AFB\u5B2A\u5B43\u5B7F\u5BE2\u5C37\u5D27\u5D84\u5D87\u5DD4\u5EC1\u5EDD\u5F12\u5FA0\u60F1\u616B\u61F5\u61F6\u61FE\u62DA\u6371\u6399\u63C0\u6451\u647B\u6493\u64BB\u64BF\u64C4\u64F1\u64F7\u650F\u652C\u665D\u6684\u6688\u66EC\u672E\u68E7\u69A6\u69ED\u69F3\u6A01\u6AAF\u6AE5\u6BA4\u6BAE\u6BAF\u6BC6\u6C08\u6C2C\u6C59\u6D87\u6EBC\u6ECC\u6EF7\u6F6F\u6F80\u6F86\u6FD8\u6FF0\u6FFA\u7006\u7018\u7030\u7051\u7192\u71C9\u71D9\u71F4\u71FE\u7274\u7377\u74A3\u750C\u7613\u7627\u7661\u7662\u7665\u766E\u7671\u7672\u76BA\u775E\u776A\u778C\u78E7\u7955\u7A08\u7AC5\u7B4D\u7C2B\u7C6C\u7CF0\u7D02\u7D1C\u7D73\u7DA2\u7DB5\u7DDE\u7E09\u7E0A\u7E37\u7E43\u7E61\u7E7D\u7E93\u7F3D\u7FF9\u81A9\u8271\u83F8\u84C0\u8514\u85BA\u86A9\u86FB\u879E\u8814\u8836\u889E\u8932\u896A\u896F\u8993\u89B2\u8A15\u8A16\u8A1D\u8A5B\u8A6C\u8A6D\u8A7C\u8AA1\u8AA3\u8AA5\u8B0A\u8B4F\u8B59\u8B96\u8C48\u8C54\u8CBD\u8CFA\u8D13\u8E89\u8E8B\u8EAA\u8EC0\u8EDB\u8EFC\u8F12\u8F1F\u8F3E\u8F45\u8FFA\u9015\u9183\u919E\u91A3\u91D7\u91F5\u9209\u9215\u923E\u9240\u9251\u9257\u927B\u9293\u92A8\u92C5\u92C7\u92F0\u9333\u935A\u9382\u938A\u9398\u93B3\u93D7\u93DF\u93E2\u93FD\u942B\u942E\u9433\u9463\u9470\u9472\u947E\u95D0\u96CB\u97C3\u97CC\u981C\u9839\u986B\u98B6\u98EA\u9909\u991A\u9935\u993E\u9951\u99A5\u99B1\u99D9\u99DD\u99F1\u9A2B\u9A62\u9A65\u9AAF\u9AD2\u9AEF\u9B0D\u9B28\u9B77\u9BFD\u9C49\u9C5F\u9C78\u9D3F\u9D72\u9DD7\u9E1B\u9EB4\u9EF4\u9F66\u9F94 -NGram.KANJI_2_31=\u5DBD\u63C6\u6E3E\u7587\u8AF1\u8B5A\u9695 -NGram.KANJI_2_32=\u53A5\u589F\u5CD9\u7109\u7F79\u8006\u8654\u8944\u968B\u96CD -NGram.KANJI_2_35=\u4F47\u4F91\u4FCE\u4FDF\u527D\u535E\u55DA\u56A5\u5879\u5A11\u5B7A\u5CAB\u5CF4\u5EBE\u5F7F\u5FA8\u601B\u606B\u60B8\u610D\u6134\u619A\u61FA\u6369\u6523\u65CC\u66C4\u6727\u6968\u6A05\u6A48\u6B59\u6BEC\u6D35\u6D38\u6E19\u701F\u7064\u711C\u716C\u71A8\u71E7\u7258\u743A\u746F\u75BD\u75D9\u75F2\u7669\u766C\u76DE\u7729\u77BC\u78EC\u792A\u7A37\u7A62\u7BE6\u7C2A\u7C50\u7D07\u7DD8\u7E5A\u7F8B\u7FD5\u7FF3\u8151\u81CD\u8317\u83F4\u85EA\u85FA\u8823\u895E\u89F4\u8A0C\u8A41\u8AA8\u8ACD\u8B10\u8CC1\u8D05\u8D73\u8E4A\u8E85\u8E91\u8EFB\u8F13\u9087\u914A\u91C9\u923F\u93B0\u9403\u95A8\u95AD\u9730\u9865\u9903\u9945\u9949\u99AD\u99E2\u9A6A\u9D26\u9E1E\u9EDD\u9F2C\u9F72 -NGram.KANJI_2_36=\u4E9E\u4F86\u5011\u50B3\u5152\u5169\u5340\u5718\u5B78\u5BE6\u5BF6\u5C07\u5EE3\u61C9\u6230\u6703\u689D\u6A02\u6C23\u7063\u7368\u756B\u7576\u767C\u7A31\u7D93\u7E23\u7E3D\u81FA\u8207\u842C\u85DD\u865F\u8B49\u8B80\u8CFD\u908A\u9435\u95DC\u965D\u9AD4\u9EE8 -NGram.KANJI_2_37=\u5480\u5580\u5C39\u67EF\u68B5\u6D85\u8521\u90B1 -NGram.KANJI_2_38=\u4E1F\u4F96\u4FE0\u50F1\u5118\u522A\u5291\u52C1\u52DB\u52F3\u52F5\u52F8\u53B2\u55CE\u562F\u580A\u5862\u58AE\u58D8\u58DF\u58E9\u58EF\u5925\u593E\u599D\u5ABD\u5C62\u5EC2\u5EDA\u5EE2\u5F4E\u5F65\u6085\u6158\u61FC\u6200\u62CB\u633E\u6416\u6436\u6490\u64CB\u64E0\u64FA\u6514\u651C\u6524\u6558\u6583\u66B1\u66C6\u66C9\u66E0\u6A11\u6A1E\u6A38\u6A62\u6AB3\u6B16\u6B98\u6BBC\u6C2B\u6DDA\u6DE8\u6DEA\u6DFA\u6EEF\u6EFE\u6F32\u6F51\u6F5B\u700F\u71D2\u7210\u7246\u7260\u72A7\u72F9\u7375\u7378\u758A\u760B\u76DC\u76EA\u77DA\u77FD\u78DA\u7919\u797F\u79AA\u7A05\u7A4C\u7ACA\u7C72\u7D81\u7DDD\u7E31\u7E69\u7E6B\u7E73\u7E96\u7E9C\u81BD\u81C9\u81DF\u8259\u8277\u8396\u83A7\u8523\u8525\u860A\u863F\u8667\u87A2\u87F2\u881F\u883B\u89F8\u8B20\u8B74\u8B9A\u8C4E\u8C6C\u8C93\u8CEC\u8D0A\u8D0F\u8D95\u8E10\u8F4E\u8FAF\u8FF4\u905E\u9072\u9081\u908F\u91AC\u91C0\u91C1\u91D0\u921E\u9223\u9245\u929C\u92B3\u92C1\u9336\u934A\u93C8\u9444\u9452\u947C\u947F\u9592\u95B1\u95C6\u95D6\u95E1\u95E2\u96DE\u9742\u978F\u984F\u9871\u98B1\u98C4\u99ED\u9A37\u9A45\u9A5F\u9AEE\u9B27\u9BCA\u9C77\u9D51\u9D5D\u9E79\u9E7C\u9E7D\u9EB5\u9EBC\u9F61\u9F63\u9F90\u9F9C -NGram.KANJI_3_1=\u5283\u7562\u7DEC\u88E1\u8F2F -NGram.KANJI_3_2=\u5009\u502B\u5049\u5075\u507D\u5091\u5098\u50B5\u50B7\u50BE\u5100\u5104\u511F\u518A\u525B\u5289\u5442\u5805\u589C\u58C7\u5922\u596A\u5A66\u5B6B\u5BE7\u5BE9\u5DBA\u5E63\u5E7E\u5FB9\u6163\u616E\u6176\u61B2\u61B6\u61F8\u639B\u63DA\u63EE\u640D\u64B2\u64C1\u64EC\u6557\u6575\u6607\u66AB\u68C4\u6A39\u6C96\u6CC1\u6E1B\u6E6F\u6E9D\u6EC5\u6F01\u6F64\u6FC3\u7058\u707D\u7344\u7642\u76E4\u7832\u790E\u7B46\u7D05\u7D0B\u7D14\u7D19\u7D1B\u7D39\u7D61\u7DB1\u7DCA\u7DD2\u7DE0\u7DE9\u7DEF\u7DF4\u7E2E\u7E3E\u8105\u8108\u81E8\u8266\u84CB\u84EE\u85A9\u885D\u88DC\u8972\u8A02\u8A0E\u8A13\u8A17\u8A2A\u8A34\u8A3A\u8A3C\u8A69\u8A73\u8A95\u8AA0\u8AA4\u8AB2\u8AC7\u8ACB\u8B00\u8B1B\u8B1D\u8B5C\u8C9D\u8C9E\u8CA2\u8CA8\u8CA9\u8CAB\u8CAC\u8CB7\u8CBF\u8CC0\u8CDE\u8CE2\u8CFC\u8D08\u8DE1\u8E8D\u8ECC\u8EDF\u8EF8\u8F14\u8F1D\u8F2A\u8F44\u9055\u9069\u9077\u907C\u90F5\u91DD\u9285\u92FC\u9326\u932F\u9375\u9396\u93AE\u93E1\u9451\u9589\u95A3\u9663\u9670\u9673\u96BB\u9801\u9802\u9803\u9806\u9808\u9810\u983B\u984D\u9858\u9867\u98EF\u98F2\u98FE\u990A\u99D0\u9A0E\u9A5A\u9B5A\u9CE5\u9DB4\u9E97\u9F8D -NGram.KANJI_3_3=\u543E\u5BEE\u5F18\u6590\u725F\u83C5\u85E9\u9E93 -NGram.KANJI_3_4=\u5016\u53AD\u5606\u5629\u58BE\u5F14\u6065\u6144\u646F\u647A\u67F5\u6953\u6C3E\u6F2C\u6F97\u6FB1\u7169\u71E6\u71ED\u74BD\u79BF\u7A1C\u7A4E\u7AAF\u7CDE\u7D17\u7D43\u7E55\u7FA8\u807E\u8139\u8490\u8569\u856A\u87FB\u8A23\u8AB9\u8AE6\u8AFA\u8B2C\u8CD1\u91D8\u92F8\u9318\u96DB\u99B4\u9BC9\u9C2D\u9CF6\u9D61\u9DFA -NGram.KANJI_3_5=\u4E26\u4F75\u4FC2\u500B\u5074\u5099\u512A\u5225\u5247\u5275\u5287\u52D5\u52D9\u52DD\u52E2\u5354\u54E1\u554F\u5712\u57F7\u5831\u5834\u5BAE\u5C0E\u5C64\u5CA1\u5CF6\u5E2B\u5E79\u5EAB\u5F35\u5F37\u5F8C\u5FA9\u611B\u614B\u63A1\u63DB\u6642\u66F8\u6771\u696D\u6975\u69CB\u6A19\u6A4B\u6A5F\u6BBA\u6C7A\u6E2C\u6E96\u6F22\u70BA\u7121\u71B1\u7372\u73FE\u74B0\u7570\u76E3\u78BA\u7A2E\u7A4D\u7AF6\u7BC0\u7BC4\u7BC9\u7C21\u7D00\u7D04\u7D0D\u7D1A\u7D30\u7D42\u7D44\u7D50\u7D66\u7D71\u7DAD\u7DDA\u7DE8\u7E54\u7F85\u7FA9\u7FD2\u8056\u805E\u8077\u8208\u83EF\u8449\u8853\u885B\u88FD\u8907\u898B\u898F\u8996\u89AA\u8A08\u8A18\u8A2D\u8A31\u8A55\u8A5E\u8A66\u8A71\u8A72\u8A8C\u8A8D\u8A9E\u8ABF\u8AD6\u8AF8\u8B58\u8B70\u8B77\u8CA0\u8CA1\u8CB4\u8CBB\u8CC7\u8CEA\u8ECA\u8ECD\u8F03\u8F09\u8F38\u8FB2\u9023\u9031\u9032\u904A\u904B\u904E\u9054\u9060\u9078\u907A\u9084\u9280\u9577\u9580\u958B\u9593\u9678\u967D\u968A\u968E\u969B\u96E2\u96E3\u96F2\u96FB\u97D3\u97FF\u9805\u9818\u982D\u984C\u985E\u98A8\u98DB\u9928\u99AC\u9BAE -NGram.KANJI_3_8=\u5F6B\u6C4E\u7B87\u8A70 -NGram.KANJI_3_9=\u540B\u5B5C\u826E -NGram.KANJI_3_11=\u4F83\u4FF8\u51CB\u52BE\u53F1\u548B\u558B\u5CB1\u5D69\u5F3C\u620E\u621F\u64E2\u67DA\u6854\u69CC\u6A35\u6C8C\u6E1A\u6F15\u6FE0\u717D\u7252\u7AFA\u82D3\u83DF\u8431\u9041\u9149\u9798 -NGram.KANJI_3_12=\u4ED5\u55E3\u572D\u57A3\u587E\u5983\u5A9B\u5C90\u5E61\u672D\u6960\u6F5F\u72D9\u72E9\u757F\u7949\u7950\u7E82\u7FCC\u82B8\u90B8\u91DC\u961C\u9B45 -NGram.KANJI_3_13=\u55AB\u6249\u643E\u6841\u68B1\u725D\u7B8B\u7C95\u7E1E\u7F36\u8A03\u8A6B\u8E74\u95A4 -NGram.KANJI_3_15=\u50AD\u50D1\u5132\u51F1\u55AC\u5617\u5687\u584A\u59EA\u5B30\u5BF5\u5C0B\u5C4D\u5EDF\u6182\u61A4\u64AB\u64FE\u66A2\u6897\u694A\u69CD\u6B3D\u6BC0\u6D29\u6F38\u7015\u7149\u71C8\u723A\u7336\u7345\u755D\u76C3\u78A9\u798D\u7AAE\u7DFB\u7E2B\u7F75\u7F77\u81E5\u834A\u852D\u85CD\u8755\u8A3B\u8A54\u8AE7\u8B02\u8B39\u8CAA\u8CE6\u8DA8\u8E5F\u8F5F\u905C\u912D\u919C\u92D2\u932B\u937E\u9418\u9583\u9812\u985B\u9905\u99B3\u99C1\u99D5\u9A30\u9CF3\u9D3B\u9D6C -NGram.KANJI_3_16=\u6D6C\u72FD\u77A5\u8956\u9C0D -NGram.KANJI_3_18=\u5919\u5F4A\u6063\u63AC\u649A\u6715\u6AD3\u71D0\u758B\u834F\u85F7\u88DF\u8F61\u93D1\u98F4\u9D60 -NGram.KANJI_3_19=\u4F50\u7DB2\u962A -NGram.KANJI_3_22=\u5E96\u75D4\u91C6 -NGram.KANJI_3_23=\u5E9A\u6C40\u821C\u839E\u8FED\u9EDB -NGram.KANJI_3_27=\u5F01\u66DC -NGram.KANJI_3_29=\u5023\u5208\u531D\u536F\u53E9\u54C9\u598A\u59BE\u5A20\u5D6F\u5DF3\u66C7\u66D6\u66F3\u6775\u6A3D\u6ADB\u6B86\u6C72\u6E25\u73EA\u7435\u760D\u7656\u7825\u78D0\u7A14\u7A6B\u7B20\u7BE0\u7CF8\u7DAC\u7DBB\u7DBE\u80E4\u80F4\u837B\u8466\u8568\u867B\u8A63\u91E7\u9320\u935B\u9591\u965B\u98E2\u990C\u9913\u9BAB -NGram.KANJI_3_30=\u60B6\u8AD2\u8CC2\u9237\u9328\u934D\u9397\u9830 -NGram.KANJI_3_31=\u4FB6\u50D5\u51CD\u559A\u55AA\u5674\u5857\u585A\u5875\u58B3\u596E\u59E6\u5A41\u5D50\u5E25\u5E33\u5F59\u61C7\u61F2\u6368\u6383\u65AC\u68DF\u68F2\u6A3A\u6B04\u6DBC\u6DF5\u6E26\u6E4A\u6E67\u6F54\u6F70\u6FC1\u6FEB\u7159\u727D\u7652\u77EF\u78EF\u798E\u7A40\u7AAA\u7BE4\u7C60\u7CE7\u7CFE\u7D21\u7D33\u7D5E\u7D79\u7DB4\u7DBF\u7E1B\u7E8F\u7F70\u814E\u816B\u8178\u819A\u84BC\u85A6\u865C\u8766\u8A1F\u8A50\u8A60\u8A6E\u8A87\u8A98\u8AB0\u8ADC\u8AED\u8AEE\u8B0E\u8B19\u8CA7\u8CAF\u8CB8\u8CBC\u8CC3\u8CC4\u8CCA\u8CDC\u8CE0\u8CED\u8ED2\u8F29\u8F3F\u91E3\u920D\u9234\u925B\u9298\u9310\u934B\u958F\u95A5\u9727\u97FB\u9811\u984E\u98FC\u98FD\u99D2\u99FF\u9B31\u9BE8\u9C57\u9CE9\u9CF4\u9D28\u9DF9 -NGram.KANJI_3_32=\u4E1E\u502D\u51A5\u5321\u58EC\u5A3C\u5BC5\u5CE8\u61A9\u620A\u65A1\u6714\u6853\u6893\u6C50\u6C5D\u7436\u745A\u745B\u773A\u7941\u7947\u8543\u865E\u8C5A\u914B\u99A8\u9AB8 -NGram.KANJI_3_35=\u4E99\u5BA5\u5DFD\u608C\u60C7\u60DA\u6190\u61A7\u6753\u6777\u6787\u6B4E\u6F23\u6FE1\u6FEF\u7337\u7827\u786F\u7893\u7ABA\u7B94\u7BB8\u7C3E\u7D62\u7E6D\u80B1\u81BF\u81C6\u821B\u82E7\u83F0\u84D1\u86ED\u8888\u8B01\u8B04\u8F4D\u9291\u92E4\u932E\u9354\u936C\u939A\u9957\u9AED\u9BAA\u9BAD\u9BD6\u9BDB\u9C3B\u9D1B -NGram.KANJI_3_36=\u50C5\u53E2\u5EE0\u65BC\u70CF\u723E\u7D10\u7D9C\u806F\u8607\u862D\u8A0A\u8AFE\u8CD3\u9019\u9813\u9B6F -NGram.KANJI_3_37=\u4EA8\u4F3D\u5384\u5EFF\u60DF\u66DD\u6E5B\u8087\u82D1\u8FE6\u9640\u9E9F -NGram.KANJI_3_38=\u5147\u525D\u5678\u617E\u6372\u79A6\u8ABC\u92EA\u9438\u9817 -NGram.KANJI_4_0=\u6D3C\u718F\u74EE\u8712 -NGram.KANJI_4_9=\u4F84\u54C6\u5565\u68F1\u6D82\u83C7 -NGram.KANJI_4_10=\u4FE9\u4FED\u51FF\u523D\u5300\u5364\u538C\u5450\u5455\u545C\u54D1\u54D7\u5578\u56A3\u58F6\u592F\u5CE6\u5D2D\u5E90\u6073\u607C\u60EB\u61D2\u62E2\u62E3\u631A\u6320\u6323\u6361\u63B7\u63B8\u63BA\u6405\u65A9\u65F7\u6619\u6655\u67A3\u67E0\u6805\u6808\u6866\u6868\u6869\u6A71\u6BE1\u6C79\u6CA5\u6CDE\u6DA4\u6DA7\u6DA9\u6E85\u70DB\u70E6\u70EB\u7115\u724D\u7410\u759F\u75AE\u75EA\u75F9\u762B\u763E\u76B1\u77EB\u783E\u79C3\u7A8D\u7A9C\u7B5D\u7BF1\u7EC5\u7ED2\u7EDE\u7EE3\u7EF7\u7EF8\u7EFD\u7F00\u7F0E\u7F15\u7F1A\u7F20\u7F24\u7F28\u7FA1\u7FD8\u8038\u803B\u804B\u80AE\u817B\u82C7\u8327\u835E\u8367\u83BA\u8424\u864F\u8681\u8682\u8715\u8717\u8721\u8747\u874E\u8845\u886C\u889C\u88E4\u89C5\u8BB6\u8BB9\u8BC0\u8BC5\u8BE1\u8BEB\u8BEC\u8BF5\u8C0E\u8C1A\u8D2E\u8D31\u8D43\u8D4E\u8D58\u8F67\u8F7F\u9489\u9499\u949D\u94A0\u94A5\u94AE\u94BE\u94D0\u94DB\u94F2\u9508\u950C\u951A\u9525\u952D\u952F\u9530\u953B\u9540\u9550\u9570\u9576\u95F0\u960E\u9668\u96CF\u97E7\u9885\u988A\u98A4\u9965\u9975\u997A\u997F\u9985\u998D\u998F\u9A6E\u9A6F\u9A74\u9A79\u9A7C\u9A82\u9A87\u9CA4\u9CC4\u9CCD\u9CD6\u9E20\u9E25\u9E35\u9E3D\u9E45\u9E49\u9E4A\u9E66 -NGram.KANJI_4_16=\u576F\u579B\u6345\u78B4\u79EB\u79F8 -NGram.KANJI_4_17=\u4E13\u4E1A\u4E1C\u4E24\u4E25\u4E2A\u4E3E\u4E49\u4E50\u4E66\u4E9A\u4EA7\u4EBF\u4ECE\u4EEC\u4EF7\u4F17\u4F20\u5170\u5173\u519B\u51B3\u51E4\u51FB\u5219\u521B\u522B\u529E\u52A1\u52A8\u52BF\u534F\u5355\u536B\u5386\u53BF\u53D1\u53D8\u542F\u5458\u54CD\u56E2\u56ED\u56F4\u56FE\u573A\u5904\u590D\u5934\u5B81\u5B9E\u5BF9\u5BFC\u5C14\u5C9B\u5E26\u5E7F\u5E94\u5F00\u5F20\u5F3A\u603B\u6218\u65E0\u65F6\u663E\u672F\u6743\u6784\u6807\u6C14\u6C49\u707E\u70ED\u73AF\u73B0\u7535\u76D1\u786E\u79CD\u79EF\u7B80\u7C7B\u7EA2\u7EA6\u7EA7\u7EAA\u7EBF\u7EC4\u7EC7\u7ED3\u7EDF\u7EE7\u7EED\u7EF4\u7F16\u7F57\u804C\u8054\u817E\u8282\u82CF\u83B7\u8425\u89C1\u89C2\u89C4\u89C6\u8BA1\u8BA4\u8BAE\u8BAF\u8BB0\u8BB8\u8BBA\u8BBE\u8BC1\u8BC4\u8BD1\u8BDD\u8BE5\u8BED\u8BF4\u8C03\u8D22\u8D23\u8D28\u8D39\u8D44\u8D5B\u8F66\u8F6C\u8F83\u8FBE\u8FC7\u8FD0\u8FD8\u8FD9\u8FDB\u8FDE\u9009\u94C1\u957F\u95E8\u95EE\u95F4\u95FB\u961F\u9633\u9645\u9646\u96BE\u9879\u9884\u9886\u9898\u98CE\u9A6C\u9F99 -NGram.KANJI_4_18=\u51DB\u67B7 -NGram.KANJI_4_22=\u4FA5\u545B\u5499\u5520\u5570\u56F1\u5A76\u5C96\u60AF\u60ED\u618B\u61A8\u62A0\u62A1\u62E7\u6363\u6390\u63B0\u6400\u6402\u6512\u6748\u70C1\u732C\u765E\u7663\u76CF\u7741\u781A\u7980\u79C6\u79FD\u7AA5\u7B0B\u7B8D\u7BA9\u7BAB\u7BD3\u7CAA\u7EAB\u7ECA\u7EE2\u7F2D\u7F30\u8110\u8113\u81CA\u835A\u8360\u84D6\u852B\u87E5\u8869\u8A8A\u8BA5\u8BF2\u8C05\u8C12\u8D30\u8D4A\u8D61\u8DF7\u8E6D\u8E8F\u8F95\u8F99\u8FAB\u94B3\u94C6\u94E3\u9504\u954A\u9563\u95FA\u9893\u9981\u9992\u9AA1\u9CAB\u9E2F\u9E33\u9EB8 -NGram.KANJI_4_24=\u4E22\u4E8F\u4F1E\u4FA3\u5151\u517D\u51BB\u51D1\u5220\u529D\u52CB\u5367\u5389\u5395\u53E0\u53F9\u5413\u548F\u5524\u575E\u575F\u5784\u5792\u57A6\u57AB\u58F3\u5986\u5988\u5A04\u5A07\u5BA0\u5C18\u5C82\u5DE9\u5E10\u5E1C\u5F2F\u60E9\u6124\u629B\u6321\u6324\u635E\u63FD\u6401\u644A\u6491\u655B\u658B\u6635\u67AB\u67DC\u680B\u692D\u6984\u6A31\u6B7C\u6BD9\u6C22\u6CA6\u6CA7\u6CEA\u6CFB\u6CFC\u6D46\u6D47\u6D4A\u6D51\u6DA1\u6E0A\u6E83\u6EE4\u6EE5\u6F9C\u6FD2\u70C2\u7237\u727A\u730E\u7574\u75AF\u7792\u7816\u7845\u78B1\u7A77\u7A91\u7A9D\u7AD6\u7B3C\u7B5B\u7CAE\u7EA4\u7EB1\u7EBA\u7ECE\u7ED1\u7EF0\u7EF3\u7F14\u7F1D\u7F34\u7F62\u8042\u806A\u80A0\u80A4\u80BE\u80BF\u80C0\u810F\u8138\u8231\u8270\u829C\u82CD\u8350\u83B9\u841D\u8574\u8680\u8BB3\u8BBC\u8BBD\u8BC8\u8BF1\u8BFD\u8C0A\u8C0D\u8C1C\u8C24\u8C26\u8C2C\u8C2D\u8C34\u8D1E\u8D2C\u8D3C\u8D41\u8D42\u8D4C\u8D50\u8D5A\u8F69\u8F88\u8F90\u8FA9\u915D\u9171\u9493\u949E\u94A7\u94A9\u94BB\u94C3\u94C5\u94DD\u94F8\u9505\u9510\u9523\u9524\u95EF\u95F7\u95F9\u9600\u9610\u96F3\u97F5\u987D\u9882\u9888\u9896\u98D8\u9971\u9972\u9976\u997C\u9A84\u9A86\u9A8F\u9A97\u9A9A\u9AA4\u9CB8\u9CDE\u9E26\u9E43\u9E64\u9E70\u9F7F\u9F9F -NGram.KANJI_4_28=\u534E\u62A5\u7ECF\u7F51 -NGram.KANJI_4_34=\u4E34\u4E3D\u4E4C\u4E54\u4E60\u4E61\u4E70\u4EB2\u4EC5\u4EEA\u4F18\u4F1F\u4F24\u4F26\u4FA7\u50A8\u513F\u5174\u517B\u518C\u519C\u51B5\u51CF\u5218\u521A\u5267\u52B3\u5356\u5382\u5385\u538B\u53A6\u5434\u5706\u5723\u5757\u575A\u575B\u575D\u5907\u591F\u593A\u5956\u5B59\u5BA1\u5BAB\u5BBD\u5BBE\u5BFB\u5C42\u5C81\u5E01\u5E08\u5E86\u5E93\u5F02\u5F39\u5F52\u5F55\u5F7B\u6000\u6001\u6076\u620F\u6237\u6267\u6269\u626C\u62A2\u62A4\u62DF\u62E5\u62E9\u6325\u635F\u6362\u6444\u6653\u6682\u6740\u6742\u6768\u6781\u6811\u6837\u6865\u68C0\u6B22\u6BC1\u6BD5\u6C47\u6C9F\u6CAA\u6CFD\u6D4B\u6DA8\u6E10\u6EE1\u6EE8\u706D\u7075\u70DF\u7231\u739B\u7597\u76D6\u76D8\u77FF\u7801\u7840\u79BB\u7A33\u7ADE\u7B14\u7B7E\u7CA4\u7D27\u7EB3\u7EBD\u7EC3\u7EC6\u7EC8\u7ECD\u7ED5\u7ED9\u7EDC\u7EDD\u7EE9\u7EFC\u7EFF\u7F13\u7F29\u8083\u80DC\u8111\u814A\u8230\u827A\u8363\u836F\u8428\u84DD\u867D\u8865\u88AD\u89C8\u8BA2\u8BA8\u8BA9\u8BAD\u8BB2\u8BBF\u8BC6\u8BCD\u8BD5\u8BEF\u8BF7\u8BF8\u8BFA\u8BFB\u8C08\u8D1D\u8D1F\u8D21\u8D25\u8D27\u8D2D\u8D2F\u8D35\u8D38\u8DC3\u8F6E\u8F6F\u8F7B\u8F7D\u8F86\u8F91\u8F93\u8F96\u8FB9\u8FBD\u8FC1\u8FDC\u8FDD\u9002\u9057\u90BB\u90D1\u91CA\u9488\u949F\u94A2\u94B1\u94F6\u9500\u9526\u9547\u9614\u9634\u9635\u9636\u9648\u9655\u9669\u9690\u97E9\u9875\u9876\u987A\u987B\u987E\u987F\u9891\u989D\u98DE\u9986\u9A7B\u9A8C\u9C81\u9C9C\u9F50 -NGram.KANJI_4_39=\u4E1B\u4E1D\u4E27\u4EA9\u4ED1\u4ED3\u4F2A\u4FA6\u4FA8\u503A\u503E\u507F\u5188\u51AF\u51C0\u51C9\u51ED\u51EF\u5242\u5251\u52B2\u5362\u53A2\u5415\u5417\u5428\u55B7\u5760\u5899\u5939\u594B\u5987\u5A31\u5A74\u5BAA\u5C1D\u5C7F\u5C97\u5CAD\u5E05\u5E2E\u5E99\u5E9E\u5E9F\u5F03\u5FC6\u5FE7\u60AC\u60CA\u60EF\u626B\u6270\u629A\u62E6\u62E8\u6446\u6447\u654C\u67AA\u680F\u6863\u68A6\u6C64\u6D01\u6D53\u6D9D\u6DA6\u6E14\u6E17\u6EDA\u6EE9\u707F\u70BC\u70E7\u7275\u72B9\u72EE\u72F1\u743C\u7545\u76D0\u7855\u7978\u7B79\u7BEE\u7EA0\u7EAC\u7EAF\u7EB2\u7EB5\u7EB7\u7EB8\u7EB9\u7ED8\u7EEA\u7EF5\u7F05\u7F06\u7F18\u7F5A\u80C1\u80F6\u8109\u8206\u8273\u82F9\u8346\u8361\u83B2\u8427\u8651\u867E\u8854\u89C9\u8BC9\u8BCA\u8BD7\u8BDA\u8BDE\u8BE2\u8BE6\u8BFE\u8C01\u8C0B\u8C10\u8C13\u8C22\u8C23\u8C28\u8C31\u8D24\u8D26\u8D29\u8D2A\u8D2B\u8D34\u8D37\u8D3A\u8D3E\u8D3F\u8D4B\u8D4F\u8D54\u8D56\u8D5E\u8D60\u8D62\u8D75\u8D76\u8D8B\u8F68\u8F70\u8F74\u8F85\u8F89\u8FC8\u8FDF\u900A\u9012\u903B\u9093\u90AE\u917F\u9274\u94A6\u94DC\u94ED\u94FA\u94FE\u9501\u950B\u9519\u9521\u952E\u955C\u95EA\u95ED\u95F2\u95F8\u95FD\u9601\u9605\u9647\u96B6\u96FE\u9877\u9881\u9887\u9897\u989C\u98A0\u996D\u996E\u9970\u9A70\u9A71\u9A73\u9A76\u9A7E\u9A91\u9C7C\u9E1F\u9E21\u9E23\u9E2D\u9E3F\u9E4F\u9F84 -NGram.KANJI_5_10=\u5239\u8EAF -NGram.KANJI_5_11=\u51C4\u8471 -NGram.KANJI_5_12=\u6DC0\u7C98 -NGram.KANJI_5_13=\u5631\u5815\u8695 -NGram.KANJI_5_14=\u4E71\u4FA0\u5265\u52B1\u5374\u53A8\u53D9\u58EE\u5BDD\u5BFF\u5C3D\u5C4A\u5CE1\u5F25\u5F84\u604B\u60A6\u60E7\u60E8\u631F\u636E\u643A\u663C\u664B\u67A2\u6816\u697C\u6B8B\u6BB4\u6D45\u6E7F\u6EDE\u6F5C\u706F\u7089\u72ED\u732A\u732B\u76D7\u793C\u7977\u7A0E\u7A83\u80C6\u811A\u8131\u82A6\u830E\u848B\u865A\u866B\u86EE\u89E6\u8A89\u8DF5\u8E0A\u8E2A\u8F9E\u9065\u968F\u9759\u9EA6 -NGram.KANJI_5_18=\u601C\u75D2 -NGram.KANJI_5_26=\u4E07\u4E0E\u4E89\u4F1A\u4F53\u515A\u5185\u5199\u533A\u533B\u53C2\u53CC\u53F7\u58F0\u5965\u5B66\u5B9D\u5C06\u5C5E\u5F53\u62C5\u6570\u65AD\u65E7\u6761\u6765\u6A2A\u6B27\u6CA1\u6E29\u6E7E\u70B9\u72B6\u72EC\u732E\u753B\u79F0\u88C5\u9EC4 -NGram.KANJI_5_29=\u693F\u82EB -NGram.KANJI_5_34=\u53F6\u6D9B\u83B1 -NGram.KANJI_5_39=\u5C61\u788D -NGram.KANJI_6_0=\u4E10\u4E52\u4EC6\u4F88\u4FD0\u51F3\u533E\u53ED\u53EE\u5406\u541D\u5429\u5435\u5440\u5490\u5495\u54B1\u54C4\u54FC\u557C\u55D3\u5669\u56E4\u5777\u5992\u59E8\u5B7D\u5BDE\u5BE5\u5C79\u5C94\u5DCD\u5E18\u5E1A\u5E54\u5FF1\u604D\u6064\u60F6\u6127\u6177\u6233\u6252\u625B\u6273\u6296\u62C2\u62C7\u62F4\u638F\u6396\u63E3\u63EA\u6413\u6479\u64A9\u64C2\u659F\u667E\u6760\u6845\u6963\u6A90\u6B83\u6C13\u6C5E\u6D8E\u6D95\u6DCC\u6ED4\u6F13\u6F3E\u6FA1\u7076\u70D8\u710A\u71CE\u7239\u72E1\u73B7\u7599\u759A\u75A4\u75CA\u7629\u7682\u76C5\u76EF\u778E\u77AA\u787C\u7889\u788C\u78BE\u79E7\u7A96\u7A98\u7B77\u7C7D\u7CB1\u7D0A\u7D6E\u7F94\u7FCE\u8116\u814B\u814C\u819B\u828D\u82DF\u8301\u83E0\u85D5\u8611\u86A3\u8708\u8822\u8C4C\u8DB4\u8DEA\u8E42\u8E66\u8E72\u8EBA\u901B\u9157\u970E\u97ED -NGram.KANJI_6_3=\u62FC\u88D4\u9B4F -NGram.KANJI_6_9=\u4ED7\u4F63\u4FCF\u5018\u50BB\u50F5\u5154\u5201\u522E\u5254\u527F\u5306\u5462\u5492\u5496\u54A8\u54AA\u554A\u5561\u5564\u5566\u5885\u5938\u5AC2\u5AE9\u5CED\u5F64\u6084\u608D\u60A8\u60D5\u61C2\u61C8\u6254\u626F\u62AC\u6346\u634D\u640F\u6454\u6487\u6495\u64D2\u6746\u6789\u68B3\u68F5\u695E\u6986\u6995\u69A8\u6A44\u6AAC\u6B79\u6C28\u6C2E\u6CF5\u6DE4\u6E34\u6E3A\u6E89\u6F29\u70AB\u70AC\u7130\u715E\u7184\u71AC\u7238\u7281\u72E0\u74E3\u74F7\u7529\u7578\u761F\u7626\u76D4\u775B\u7779\u7784\u77BB\u780C\u780D\u7838\u7898\u78C5\u78F7\u7AED\u7B28\u7BE1\u7C07\u7CD5\u7CD9\u7CEF\u7F38\u800D\u8084\u809A\u8165\u816E\u832B\u8334\u840D\u8774\u886B\u888D\u88D9\u88F9\u8C41\u8D81\u8D9F\u8E22\u8E29\u8EB2\u8F9C\u9165\u918B\u9631\u964B\u964C\u9661\u9709\u9739\u9776\u9AD3\u9ED4 -NGram.KANJI_6_10=\u4E53\u5582\u5600\u6342\u7B06 -NGram.KANJI_6_11=\u5288\u543C\u5475\u5486\u54EE\u5598\u56BC\u5962\u5A36\u5A9A\u5B75\u5BA6\u5C38\u5C4E\u5F8A\u5F98\u627C\u62CC\u62D7\u63C9\u6930\u6954\u69D0\u6BEF\u6C90\u6CBD\u6CBE\u6F31\u6F88\u70D9\u7329\u75BC\u75F0\u7737\u77D7\u7B19\u7FB9\u803F\u80D6\u813E\u81C0\u8205\u8309\u83BD\u846B\u8517\u868C\u8759\u8815\u8859\u8B6C\u8E81\u8EAC\u90A2\u9698\u9B44 -NGram.KANJI_6_12=\u722C\u7FD4 -NGram.KANJI_6_16=\u5228\u5315\u542E\u54CE\u5509\u5527\u5543\u55B3\u55E1\u5636\u568E\u5FFF\u61E6\u6376\u642A\u6726\u74E4\u76F9\u7736\u7BD9\u8019\u80F0\u80F3\u812F\u818A\u8200\u8214\u8638\u869C\u86C0\u86C6\u86D4\u87C6\u88B1\u8902\u8C7A\u8E4B\u9119 -NGram.KANJI_6_18=\u67D2\u6ED3\u87C0\u87CB\u8DDB\u901E\u9163 -NGram.KANJI_6_20=\u4F5B\u52D2\u54C8\u62FF\u66FC\u6D59\u704C\u7586\u9ECE -NGram.KANJI_6_21=\u4E48\u4EFF\u4F19\u4FF1\u5021\u5077\u5195\u5212\u5269\u5401\u541E\u5427\u54EA\u5587\u558A\u55BB\u566A\u573E\u574E\u5783\u57AE\u584C\u58E4\u5960\u5976\u59CA\u5A1C\u5DE2\u5F99\u600E\u6015\u6263\u626D\u6293\u62C6\u62D6\u62EF\u62F1\u6316\u632A\u6380\u6389\u63D2\u641E\u64C5\u64CE\u65F1\u6664\u6735\u6770\u67EC\u6846\u684C\u68AD\u6B47\u6B49\u6B67\u6C1B\u6C27\u6C2F\u6C5B\u6C89\u6DF9\u6EAF\u70AE\u70E4\u731C\u7334\u73BB\u7470\u76FC\u788E\u789F\u78B0\u78B3\u7A0D\u7A3B\u7A57\u7CB9\u7F69\u8335\u8354\u84BF\u8DCC\u8DD1\u904F\u90A8\u9189\u9677\u9738\u978B -NGram.KANJI_6_22=\u5162\u53E8\u542D\u5501\u552C\u5639\u563F\u56B7\u6043\u60B4\u6194\u61CA\u634E\u63CD\u6414\u64AC\u6DAE\u6E43\u6F66\u7095\u7316\u733E\u7728\u7830\u78D5\u7ABF\u7FE9\u8018\u80EF\u8198\u8693\u86AA\u86AF\u874C\u8783\u879F\u8892\u8E6C -NGram.KANJI_6_23=\u4FD8\u4FEF\u501A\u5085\u5180\u526A\u5323\u54ED\u5634\u56CA\u58A9\u58F9\u5955\u5978\u59DA\u5A49\u5B55\u5BC7\u5BE8\u5D4C\u5E62\u6467\u64BC\u6500\u655E\u6572\u658C\u6670\u68CD\u68D5\u68E0\u6912\u6A0A\u6BB7\u6C9B\u6D3D\u6DC6\u6E23\u6F8E\u7011\u7092\u714C\u73AB\u7405\u7624\u76D2\u7960\u79C9\u7A20\u7BF7\u7F50\u804A\u8086\u81C2\u8292\u82DE\u852C\u857E\u859B\u8760\u8C6B\u8DBE\u8E48\u8F9F\u96A7 -NGram.KANJI_6_25=\u4E8E\u5DF2\u5FB7\u7AD9 -NGram.KANJI_6_28=\u4E58\u4ECD\u4EFD\u4F30\u4F60\u4F69\u503C\u5047\u51B0\u51F0\u5361\u5377\u53E6\u54E5\u552E\u5708\u5740\u5761\u57C3\u5821\u589E\u5979\u59C6\u5B69\u5B83\u5E15\u5E76\u5F17\u5F88\u6208\u622A\u624E\u627E\u62D4\u62DC\u63ED\u641C\u6536\u6548\u65C1\u665A\u6668\u67E5\u6B65\u6BCF\u6C61\u6CDB\u6D4E\u6D89\u6DB5\u6E38\u6EAA\u6FB3\u70B8\u745F\u7538\u7A97\u7F3A\u7F55\u805A\u8258\u827E\u82AC\u8303\u83F2\u8482\u85CF\u8DDF\u903E\u9080\u970D\u9760\u9ED1\u9ED8 -NGram.KANJI_6_29=\u634F\u6518\u7B50\u809B -NGram.KANJI_6_30=\u54A7\u57C2\u5AB3\u60CB\u6886\u8378\u85D0\u8671 -NGram.KANJI_6_32=\u5080\u5121\u51A4\u54AC\u55DC\u592D\u5DEB\u6292\u68D8\u69B4\u6A59\u6E24\u7FC5\u80DA\u8180\u86DB\u8700\u8DCB\u9761 -NGram.KANJI_6_34=\u4E30\u51E0\u542C\u613F -NGram.KANJI_6_35=\u4E56\u547B\u55FD\u5C41\u606C\u6115\u6CAE\u7119\u795F\u7CDC\u86C9\u86F9\u8713\u873B\u8757\u8925\u892A\u96F9 -NGram.KANJI_6_37=\u51B2\u5308\u5398\u54B8\u59DC\u5C4F\u5D14\u5F6D\u60E0\u6241\u6350\u699C\u6BEB\u6C6A\u6CC4\u6DEE\u6F58\u6F6D\u7199\u77EE\u7ADF\u8058\u820D\u8212\u8389\u8587\u884D\u8881\u8FA8\u8FF9\u96D5 -NGram.KANJI_6_39=\u574F\u6251\u6302 -NGram.KANJI_7_0=\u52FA\u5544\u60F0\u6994\u86A4\u86E4 -NGram.KANJI_7_3=\u4E59\u4E7E\u4EAD\u4EF0\u4EF2\u4F0F\u4F10\u4FAF\u4FCA\u500D\u501F\u5076\u508D\u50E7\u5112\u5146\u5192\u51AC\u51DD\u51FD\u5200\u5237\u524A\u52A3\u52C3\u52C7\u52DF\u5351\u5352\u5353\u5378\u537F\u53E5\u5439\u54FA\u574A\u5782\u57CB\u5893\u58C1\u5915\u5937\u5949\u5951\u5974\u59B9\u5A18\u5A5A\u5ACC\u5B54\u5B5D\u5B64\u5B8F\u5BBF\u5BD2\u5C3A\u5C6F\u5CB3\u5D07\u5DE7\u5E84\u5E8A\u5F26\u5F69\u5F70\u5F90\u5FAA\u5FCD\u6012\u6016\u602A\u60A0\u60B2\u60BC\u6148\u6162\u6170\u6291\u6298\u62AB\u62BC\u62BD\u62D2\u62D3\u62D8\u62F3\u6311\u638C\u6398\u63E1\u642C\u6458\u64A4\u654F\u656C\u659C\u65E2\u65E8\u65EC\u6606\u6614\u6676\u6691\u6696\u66F9\u6749\u676F\u679A\u679D\u67CF\u67D4\u67F1\u67F3\u67F4\u6817\u6842\u6843\u6851\u68A8\u68CB\u68D2\u6B20\u6B32\u6BBF\u6C57\u6C88\u6CCA\u6D17\u6D1E\u6D69\u6D6E\u6D78\u6DE1\u6DFB\u6E58\u6EB6\u6F0F\u6F20\u7070\u708E\u70AD\u7126\u718A\u71C3\u7267\u72C2\u731B\u7384\u73A9\u73CD\u7434\u75AB\u75DB\u76C6\u76FE\u773C\u7891\u78C1\u795D\u7965\u79D2\u79DF\u79E6\u7A00\u7B11\u7B51\u7B54\u7C89\u7C92\u7CD6\u7D2B\u7F8A\u7FBD\u7FFC\u8010\u80A5\u80CE\u8150\u8179\u819C\u8247\u829D\u82B3\u82D7\u82E6\u8302\u8336\u8352\u83CA\u83CC\u83DC\u845B\u846C\u84B2\u84B8\u84C4\u8584\u864E\u86C7\u8861\u8863\u8870\u888B\u8896\u88D5\u8986\u8C46\u8DA3\u8E0F\u8F9B\u8FC5\u8FEB\u8FF7\u9003\u9006\u902E\u9042\u9063\u90ED\u963B\u9676\u96EA\u9756\u9B3C\u9B42\u9F3B -NGram.KANJI_7_6=\u4E01\u4E03\u4E45\u4E5D\u4E88\u4E92\u4EA1\u4ECB\u4EE4\u4F01\u4F0A\u4F2F\u4F3C\u4F4E\u4F4F\u4F55\u4F8B\u4F9D\u4FBF\u4FEE\u505C\u50CF\u516B\u516D\u5175\u5177\u5178\u5207\u520A\u5224\u526F\u529F\u52A9\u5343\u5348\u535A\u5370\u53BB\u53CB\u53F3\u5409\u542B\u544A\u547C\u5584\u5747\u5802\u590F\u592B\u5931\u5947\u597D\u5A01\u5A92\u5B63\u5B8C\u5B97\u5BA2\u5BA3\u5BA4\u5BB3\u5BB9\u5BC6\u5BCC\u5BDF\u5C04\u5C1A\u5C45\u5C4B\u5CB8\u5DE6\u5E0C\u5E1D\u5E2D\u5E55\u5E8F\u5E95\u5E97\u5EA7\u5EB7\u5EF6\u5F8B\u5FAE\u5FC5\u5FD7\u5FF5\u601D\u6025\u606F\u60F3\u611F\u623F\u6253\u6279\u627F\u6295\u6297\u62EC\u6388\u6392\u63F4\u6545\u6551\u6574\u6599\u65C5\u65E9\u6613\u6620\u6625\u666E\u666F\u66B4\u66F4\u670D\u671B\u6728\u672B\u6751\u677E\u67B6\u6838\u6839\u6848\u68EE\u690D\u6982\u6A21\u6B4C\u6B62\u6B66\u6BB5\u6BCD\u6C0F\u6C38\u6C42\u6CBF\u6CE2\u6CE8\u6D0B\u6D3E\u6D88\u6DF1\u6E05\u6E56\u706B\u7167\u7206\u7236\u7247\u7387\u7530\u7537\u7559\u7565\u7591\u75C5\u767B\u767D\u767E\u7687\u76DB\u76DF\u771F\u7763\u77ED\u7834\u79FB\u7A81\u7AE0\u7AEF\u7B56\u7B97\u7C4D\u7CBE\u7D20\u7D22\u7F72\u7FA4\u8001\u8003\u81F4\u822A\u826F\u82B1\u8349\u843D\u878D\u8857\u89D2\u8B66\u8C37\u8D70\u8D85\u8D8A\u8DB3\u8FF0\u8FFD\u9001\u901F\u90A3\u90A6\u914D\u91CE\u9632\u963F\u9644\u964D\u9664\u96C4\u96E8\u9752\u9769\u98DF -NGram.KANJI_7_7=\u4E09\u4E0A\u4E0B\u4E0D\u4E16\u4E3B\u4E8B\u4E8C\u4EE3\u4EE5\u4F4D\u4F5C\u4F7F\u5165\u5168\u516C\u5171\u51FA\u5206\u5229\u5236\u524D\u529B\u52A0\u5316\u5317\u5357\u539F\u53CA\u53F0\u5408\u540C\u540D\u548C\u5730\u57FA\u5916\u591A\u5929\u5B50\u5B9A\u5BB6\u5C0F\u5C71\u5DDE\u5DE5\u5E02\u5E73\u5EA6\u5EFA\u5F0F\u6027\u6210\u6240\u6307\u653F\u6587\u65B0\u65B9\u660E\u6700\u6709\u671F\u672C\u6B21\u6B63\u6C11\u6CBB\u6CD5\u6D77\u7269\u7279\u7406\u751F\u7528\u7531\u754C\u76EE\u76F8\u793E\u79D1\u7ACB\u7B2C\u7B49\u7CFB\u8005\u80FD\u81EA\u82F1\u884C\u8868\u897F\u8981\u901A\u9053\u90E8\u90FD\u91CD\u9AD8 -NGram.KANJI_7_9=\u4E4D\u4F36\u5319\u6A61\u6DCB\u7194 -NGram.KANJI_7_11=\u4E5E\u4F43\u5026\u50FB\u515C\u5243\u5420\u5446\u54B3\u54BD\u553E\u55A7\u5703\u5984\u5AC9\u5B09\u5C51\u5DFE\u5ED3\u5F1B\u6055\u618E\u62D9\u65A7\u6652\u6977\u6EBA\u707C\u75D8\u79E4\u7AFF\u7B4F\u7CA5\u808B\u8098\u80B4\u8235\u82DB\u849C\u8549\u868A\u86FE\u8718\u914C -NGram.KANJI_7_12=\u4E08\u4E38\u4F8D\u50DA\u5203\u5256\u52C9\u52D8\u52FE\u5320\u533F\u5375\u53D4\u540F\u54E8\u56DA\u5806\u5996\u5999\u59A5\u59A8\u59FF\u5AE1\u5BB0\u5BF8\u5C09\u5C3F\u5C48\u5C65\u5D29\u5E06\u5E4C\u5EB5\u5EB6\u5EB8\u5F13\u5FCC\u5FD8\u6052\u606D\u609F\u60D1\u614E\u6247\u62B1\u6349\u64E6\u6577\u65ED\u6674\u6734\u67C4\u6850\u690E\u6A58\u6B3A\u6B89\u6C41\u6CBC\u6CCC\u6CF3\u6D74\u6DAF\u6DF3\u6ECB\u6F02\u6F84\u71E5\u7261\u7272\u72AC\u72FC\u733F\u7409\u755C\u76F2\u7720\u77AC\u77E2\u7802\u786B\u78E8\u7901\u7948\u79E9\u7A1A\u7A74\u7AE3\u7B4B\u7B52\u7BB1\u7C3F\u8015\u8096\u809D\u80A2\u80A9\u80AA\u80BA\u80F8\u8102\u810A\u8154\u8155\u8170\u817A\u81A8\u81ED\u820C\u8236\u82BD\u8305\u83E9\u83F1\u840C\u85FB\u8650\u8702\u8A93\u8E44\u8FB0\u9038\u9091\u90AA\u916C\u9175\u9177\u9685\u96C0\u96C7\u96CC\u97AD -NGram.KANJI_7_13=\u63D6\u803D -NGram.KANJI_7_16=\u602F\u7566 -NGram.KANJI_7_18=\u634C\u7C38 -NGram.KANJI_7_19=\u4E18\u4E73\u4E95\u4EAB\u4EC1\u4ED8\u4ED9\u4F11\u4F34\u4F38\u4F59\u4FB5\u4FC3\u4FD7\u5012\u5019\u5065\u50AC\u5144\u5145\u514D\u517C\u51A0\u51B7\u5211\u5238\u523A\u523B\u5272\u52E4\u5360\u5371\u539A\u541B\u5426\u5438\u5473\u54F2\u5510\u552F\u5531\u559C\u5609\u56F0\u56FA\u591C\u5948\u594F\u59BB\u59D3\u5B85\u5B87\u5B88\u5B99\u5B9C\u5BC4\u5BFA\u5C0A\u5C3E\u5CA9\u5D0E\u5DE1\u5DE8\u5DEE\u5DF1\u5E45\u5E78\u5E7B\u5E7C\u5EAD\u5EF7\u5F1F\u5F31\u5F79\u5F7C\u5F85\u5F92\u5FA1\u5FE0\u6050\u60A3\u6212\u62DB\u632F\u6355\u63A2\u63AA\u63CF\u642D\u6469\u64CD\u653B\u6563\u660C\u662D\u667A\u6697\u66FF\u6750\u675F\u677F\u6790\u67D3\u682A\u6885\u68B0\u6B8A\u6B96\u6BDB\u6C60\u6CB9\u6CC9\u6D25\u6D66\u6DB2\u6DF7\u6E21\u6ED1\u6F2B\u6F6E\u6FC0\u7235\u725B\u72AF\u7389\u7532\u7533\u756A\u75BE\u75C7\u76AE\u76CA\u7740\u786C\u7956\u7968\u796D\u7981\u79C0\u79C1\u79CB\u79D8\u7A3F\u7AE5\u7AF9\u7E41\u7F6A\u7FFB\u8089\u80CC\u80DE\u81E3\u821E\u8239\u82E5\u8328\u8377\u85E4\u8840\u88C1\u88C2\u8C6A\u8D64\u8DDD\u8FCE\u8FD4\u9000\u9014\u907F\u90CA\u90CE\u90E1\u9152\u9178\u9686\u9694\u969C\u9707\u9732\u9AA8\u9B54\u9E7F\u9EBB -NGram.KANJI_7_20=\u4E39\u4E43\u4EAE\u4F73\u504F\u505A\u51C6\u51CC\u52AA\u5339\u5347\u53EB\u53EC\u5448\u5766\u57F9\u5854\u585E\u58A8\u5B8B\u5C01\u5CF0\u5E72\u5EC9\u5F80\u5F81\u5FBD\u5FEB\u6069\u6211\u624D\u628A\u62B5\u62CD\u6309\u63A7\u64AD\u6566\u6597\u65CB\u65D7\u6628\u6717\u6731\u674E\u675C\u683D\u6881\u6B3E\u6BD2\u6C7D\u6C99\u6CE5\u6CF0\u6D1B\u6D2A\u70C8\u719F\u724C\u7259\u73E0\u73ED\u745E\u74E6\u7518\u751A\u7686\u770B\u7B26\u8033\u80A1\u80E1\u821F\u83AB\u8499\u8D74\u8DE8\u900F\u9010\u9047\u904D\u906D\u9675\u96C5\u96F6\u96F7\u9700\u9F13 -NGram.KANJI_7_21=\u5764\u59D0\u5A03\u6062\u6108\u68C9\u7164\u79BE\u7BAD\u903C -NGram.KANJI_7_23=\u4EA5\u50B2\u532A\u5366\u543B\u54E9\u5632\u59D1\u5BB5\u5DF7\u5F6A\u5F6C\u5FFD\u6070\u6168\u61BE\u63A0\u63A9\u6478\u65A4\u68A7\u6A1F\u6CAB\u70F9\u711A\u723D\u7262\u72F8\u751C\u754F\u75B9\u76C8\u7709\u7897\u7CCA\u7F9E\u8299\u82AD\u82B9\u82D4\u8304\u84C9\u84EC\u854A\u85AF\u86D9\u8FA3\u9187\u97A0 -NGram.KANJI_7_25=\u4E14\u4E5F\u4F46\u514B\u5176\u5230\u5373\u53EA\u540E\u5982\u5C3C\u5DF4\u6216\u62C9\u65AF\u66FE\u6B64\u6D32\u6D6A\u7BC7\u800C -NGram.KANJI_7_28=\u4E4E\u4E9B\u4EA6\u4EC0\u4FC4\u5403\u5957\u5C24\u6089\u6258\u67D0\u758F\u7FF0\u8D6B -NGram.KANJI_7_29=\u4FAE\u5944\u5A29\u6101\u62ED\u6328\u637B\u6666\u6687\u66AE\u673D\u6756\u67FF\u6813\u68A2\u699B\u7078\u708A\u7396\u7422\u7525\u75E2\u76BF\u7766\u77B3\u7A3C\u7A92\u819D\u81FC\u8237\u8338\u8511\u88F3\u8FC2 -NGram.KANJI_7_32=\u4E11\u4F3A\u4F51\u5197\u51B6\u51F9\u52FF\u541F\u5507\u5589\u5993\u5A7F\u5AC1\u5B9B\u5BC2\u5BE1\u5F04\u5F0A\u5F27\u6020\u6028\u6068\u6094\u6109\u611A\u614C\u621A\u62B9\u62D0\u62F7\u62FE\u632B\u633D\u6367\u660F\u6627\u6643\u66D9\u674F\u6795\u67AF\u67D1\u6876\u68DA\u68FA\u6905\u69FD\u6A80\u6B6A\u6CB8\u6CE3\u6DD1\u6DEB\u6E9C\u6EA2\u6EF4\u6F06\u714E\u716E\u722A\u7280\u74A7\u752B\u75B2\u75D5\u75F4\u77AD\u77E9\u785D\u79BD\u7A3D\u7A9F\u7B1B\u7B95\u7C9F\u7CDF\u80C3\u8106\u817F\u818F\u81B3\u828B\u82A5\u82AF\u840E\u851A\u853D\u8776\u87F9\u8877\u8910\u8912\u8C79\u8D66\u8FB1\u9017\u90C1\u916A\u9699\u96C1\u971C\u9774\u978D -NGram.KANJI_7_33=\u4E4B\u4E86\u4E94\u4EA4\u4EAC\u4ECA\u4ED6\u4EF6\u4EFB\u4F9B\u4FDD\u4FE1\u5143\u5148\u5149\u518D\u5217\u521D\u5305\u5341\u534A\u53C8\u53CD\u53D6\u53D7\u53E3\u53E4\u53EF\u53F2\u53F8\u5404\u5411\u5468\u547D\u54C1\u5546\u5668\u56DB\u56DE\u56E0\u571F\u578B\u57CE\u57DF\u5883\u58EB\u592A\u592E\u5973\u59CB\u59D4\u5B57\u5B58\u5B89\u5B98\u5C11\u5C31\u5C40\u5C55\u5DDD\u5E03\u5E38\u5E9C\u5F15\u5F62\u5F71\u5F97\u5FC3\u60C5\u610F\u624B\u6280\u6301\u63A5\u63A8\u63D0\u652F\u6539\u653E\u6559\u65BD\u65CF\u661F\u66F2\u671D\u672A\u6797\u679C\u6821\u683C\u6B7B\u6BD4\u6C34\u6C5F\u6CB3\u6D3B\u6D41\u6E2F\u6E90\u6F14\u7136\u7248\u738B\u7403\u76F4\u7701\u77E5\u77F3\u7814\u793A\u795E\u798F\u7A0B\u7A76\u7A7A\u7BA1\u7C73\u7F6E\u7F8E\u80B2\u81F3\u822C\u8272\u8457\u88AB\u89E3\u8A00\u8C61\u8D77\u8DEF\u8EAB\u8FD1\u9020\u91CC\u91CF\u91D1\u9650\u9662\u96C6\u975E\u9762\u97F3\u9996\u9999 -NGram.KANJI_7_35=\u55C5\u57A2\u58D5\u59E5\u637A\u74E2\u7CE0\u895F -NGram.KANJI_7_37=\u4E19\u4E32\u4E4F\u4E91\u4EC7\u4ED4\u4F0D\u5141\u51E1\u51F6\u51F8\u52AB\u535C\u53C9\u53DB\u540A\u5410\u54C0\u559D\u5750\u5751\u576A\u57E0\u5824\u582A\u5830\u5835\u5851\u5858\u586B\u5954\u59FB\u5A46\u5B5F\u5BB4\u5BD3\u5C16\u5C60\u5CFB\u5D16\u5E16\u5E3D\u5E7D\u5E87\u5ECA\u5FD9\u60DC\u60F9\u6155\u6167\u6234\u626E\u6276\u6284\u633A\u6377\u6492\u649E\u64B0\u6562\u6591\u65A5\u65E6\u65FA\u6602\u670B\u676D\u68AF\u695A\u6B23\u6BC5\u6C70\u6C83\u6CE1\u6D8C\u6DD8\u6E20\u71D5\u72D0\u72D7\u73B2\u73CA\u7433\u7483\u74DC\u74F6\u7554\u764C\u7761\u77DB\u78A7\u7A46\u7A7F\u7A84\u7C97\u7D2F\u7FC1\u7FE0\u8000\u8017\u808C\u80AF\u8404\u8461\u8463\u8475\u8513\u85AA\u8679\u86CB\u871C\u87BA\u88F8\u8C8C\u8DF3\u8FC4\u901D\u9022\u906E\u9075\u9192\u91C7\u966A\u971E\u9910\u9B41\u9F0E\u9F20 diff --git a/src/test/java/org/xbib/elasticsearch/common/langdetect/DetectorTest.java b/src/test/java/org/xbib/elasticsearch/common/langdetect/DetectorTest.java deleted file mode 100644 index 25dcb18..0000000 --- a/src/test/java/org/xbib/elasticsearch/common/langdetect/DetectorTest.java +++ /dev/null @@ -1,85 +0,0 @@ -package org.xbib.elasticsearch.common.langdetect; - -import java.util.List; - -import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.common.settings.Settings; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - -public class DetectorTest extends Assert { - - private static final String UNKNOWN_LANG = "unknown"; - - private static final String TRAINING_EN = "a a a b b c c d e"; - - private static final String TRAINING_FR = "a b b c c c d d d"; - - private static final String TRAINING_JA = "\u3042 \u3042 \u3042 \u3044 \u3046 \u3048 \u3048"; - - private Detector detect; - - @BeforeClass - public void setUp() throws Exception { - - detect = new Detector(); - - LangProfile profile_en = new LangProfile("en"); - for (String w : TRAINING_EN.split(" ")) { - profile_en.add(w); - } - detect.addProfile(profile_en, 0, 3); - - LangProfile profile_fr = new LangProfile("fr"); - for (String w : TRAINING_FR.split(" ")) { - profile_fr.add(w); - } - detect.addProfile(profile_fr, 1, 3); - - LangProfile profile_ja = new LangProfile("ja"); - for (String w : TRAINING_JA.split(" ")) { - profile_ja.add(w); - } - detect.addProfile(profile_ja, 2, 3); - - detect.reset(); - - } - - @Test - public void testDetector1() { - assertEquals(detect.detect("a"), "en"); - } - - @Test - public void testDetector2() { - assertEquals(detect.detect("b d"), "fr"); - } - - @Test - public void testDetector3() { - assertEquals(detect.detect("d e"), "en"); - } - - @Test - public void testDetector4() { - assertEquals(detect.detect("\u3042\u3042\u3042\u3042a"), "ja"); - } - - @Test - public void testLangList() { - List langList = detect.getLangList(); - assertEquals(langList.size(), 3); - assertEquals(langList.get(0), "en"); - assertEquals(langList.get(1), "fr"); - assertEquals(langList.get(2), "ja"); - } - - @Test - public void testPunctuation() { - assertEquals(detect.detect("..."), UNKNOWN_LANG); - } - - -} \ No newline at end of file diff --git a/src/test/java/org/xbib/elasticsearch/common/langdetect/SimpleDetectorTest.java b/src/test/java/org/xbib/elasticsearch/common/langdetect/SimpleDetectorTest.java deleted file mode 100644 index 846fee9..0000000 --- a/src/test/java/org/xbib/elasticsearch/common/langdetect/SimpleDetectorTest.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.xbib.elasticsearch.common.langdetect; - -import org.testng.Assert; -import org.testng.annotations.Test; - -public class SimpleDetectorTest extends Assert { - - @Test - public final void testDetector() throws Exception { - - Detector detect = new Detector(); - detect.loadDefaultProfiles(); - assertEquals("de", detect.detect("Das kann deutsch sein")); - detect.reset(); - assertEquals("en", detect.detect("This is a very small test")); - } - -} \ No newline at end of file diff --git a/src/test/java/org/xbib/elasticsearch/common/langdetect/DetectLanguageTest.java b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/DetectLanguageTest.java similarity index 57% rename from src/test/java/org/xbib/elasticsearch/common/langdetect/DetectLanguageTest.java rename to src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/DetectLanguageTest.java index 2807085..e1f0037 100644 --- a/src/test/java/org/xbib/elasticsearch/common/langdetect/DetectLanguageTest.java +++ b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/DetectLanguageTest.java @@ -1,8 +1,10 @@ -package org.xbib.elasticsearch.common.langdetect; +package org.xbib.elasticsearch.index.mapper.langdetect; import org.elasticsearch.common.io.Streams; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.junit.Assert; +import org.junit.Test; +import org.xbib.elasticsearch.module.langdetect.LangdetectService; import java.io.IOException; import java.io.InputStreamReader; @@ -14,22 +16,22 @@ public class DetectLanguageTest extends Assert { @Test public void testEnglish() throws IOException { - testLanguage("/english.txt", "en"); + testLanguage("english.txt", "en"); } @Test public void testChinese() throws IOException { - testLanguage("/chinese.txt", "zh-cn"); + testLanguage("chinese.txt", "zh-cn"); } @Test public void testJapanese() throws IOException { - testLanguage("/japanese.txt", "ja"); + testLanguage("japanese.txt", "ja"); } @Test public void testKorean() throws IOException { - testLanguage("/korean.txt", "ko"); + testLanguage("korean.txt", "ko"); } private void testLanguage(String path, String lang) throws IOException { @@ -38,9 +40,9 @@ private void testLanguage(String path, String lang) throws IOException { Streams.copy(reader, writer); reader.close(); writer.close(); - Detector detect = new Detector(); - detect.loadDefaultProfiles(); - assertEquals(detect.detect(writer.toString()), lang); + LangdetectService detect = new LangdetectService(ImmutableSettings.EMPTY); + detect.start(); + assertEquals(detect.detectAll(writer.toString()).get(0).getLanguage(), lang); } } diff --git a/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/DetectorTest.java b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/DetectorTest.java new file mode 100644 index 0000000..3f38ab7 --- /dev/null +++ b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/DetectorTest.java @@ -0,0 +1,79 @@ +package org.xbib.elasticsearch.index.mapper.langdetect; + +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.xbib.elasticsearch.index.analysis.langdetect.LangProfile; +import org.xbib.elasticsearch.index.analysis.langdetect.LanguageDetectionException; +import org.xbib.elasticsearch.module.langdetect.LangdetectService; + +public class DetectorTest extends Assert { + + private static final String TRAINING_EN = "a a a b b c c d e"; + + private static final String TRAINING_FR = "a b b c c c d d d"; + + private static final String TRAINING_JA = "\u3042 \u3042 \u3042 \u3044 \u3046 \u3048 \u3048"; + + private static LangdetectService detect; + + @BeforeClass + public static void setUp() throws Exception { + + Settings settings = ImmutableSettings.settingsBuilder() + .put("languages", "") + .build(); + detect = new LangdetectService(settings); + detect.start(); + + LangProfile profile_en = new LangProfile("en"); + for (String w : TRAINING_EN.split(" ")) { + profile_en.add(w); + } + detect.addProfile(profile_en, 0, 3); + + LangProfile profile_fr = new LangProfile("fr"); + for (String w : TRAINING_FR.split(" ")) { + profile_fr.add(w); + } + detect.addProfile(profile_fr, 1, 3); + + LangProfile profile_ja = new LangProfile("ja"); + for (String w : TRAINING_JA.split(" ")) { + profile_ja.add(w); + } + detect.addProfile(profile_ja, 2, 3); + + //detect.reset(); + } + + @Test + public void testDetector1() throws LanguageDetectionException { + assertEquals(detect.detectAll("a").get(0).getLanguage(), "en"); + } + + @Test + public void testDetector2() throws LanguageDetectionException { + assertEquals(detect.detectAll("b d").get(0).getLanguage(), "fr"); + } + + @Test + public void testDetector3() throws LanguageDetectionException { + assertEquals(detect.detectAll("d e").get(0).getLanguage(), "en"); + } + + @Test + public void testDetector4() throws LanguageDetectionException { + assertEquals(detect.detectAll("\u3042\u3042\u3042\u3042a").get(0).getLanguage(), "ja"); + } + + @Test + public void testPunctuation() throws LanguageDetectionException { + assertTrue(detect.detectAll("...").isEmpty()); + } + + +} \ No newline at end of file diff --git a/src/test/java/org/xbib/elasticsearch/common/langdetect/LangProfileTest.java b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LangProfileTest.java similarity index 77% rename from src/test/java/org/xbib/elasticsearch/common/langdetect/LangProfileTest.java rename to src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LangProfileTest.java index 664e6b9..8aaf3b6 100644 --- a/src/test/java/org/xbib/elasticsearch/common/langdetect/LangProfileTest.java +++ b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LangProfileTest.java @@ -1,7 +1,8 @@ -package org.xbib.elasticsearch.common.langdetect; +package org.xbib.elasticsearch.index.mapper.langdetect; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.junit.Assert; +import org.junit.Test; +import org.xbib.elasticsearch.index.analysis.langdetect.LangProfile; public class LangProfileTest extends Assert { @@ -24,7 +25,7 @@ public final void testAdd() { assertEquals((int) profile.freq.get("a"), 1); profile.add("a"); assertEquals((int) profile.freq.get("a"), 2); - profile.omitLessFreq(); + //profile.omitLessFreq(); } @Test @@ -60,15 +61,10 @@ public final void testOmitLessFreq() { assertEquals((int) profile.freq.get("a"), 5); assertEquals((int) profile.freq.get("\u3042"), 5); assertEquals((int) profile.freq.get("\u3050"), 1); - profile.omitLessFreq(); - assertEquals(profile.freq.get("a"), null); - assertEquals((int) profile.freq.get("\u3042"), 5); - assertEquals(profile.freq.get("\u3050"), null); + //profile.omitLessFreq(); + //assertEquals(profile.freq.get("a"), null); + //assertEquals((int) profile.freq.get("\u3042"), 5); + //assertEquals(profile.freq.get("\u3050"), null); } - @Test - public final void testOmitLessFreqIllegally() { - LangProfile profile = new LangProfile(); - profile.omitLessFreq(); - } } diff --git a/src/test/java/org/xbib/elasticsearch/module/langdetect/LangdetectMappingTest.java b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LangdetectMappingTest.java similarity index 50% rename from src/test/java/org/xbib/elasticsearch/module/langdetect/LangdetectMappingTest.java rename to src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LangdetectMappingTest.java index 28ce402..03813ee 100644 --- a/src/test/java/org/xbib/elasticsearch/module/langdetect/LangdetectMappingTest.java +++ b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LangdetectMappingTest.java @@ -1,109 +1,118 @@ -package org.xbib.elasticsearch.module.langdetect; +package org.xbib.elasticsearch.index.mapper.langdetect; -import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.base.Charsets; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.collect.Maps; +import org.elasticsearch.common.inject.Injector; +import org.elasticsearch.common.inject.ModulesBuilder; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.settings.SettingsModule; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.EnvironmentModule; import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexNameModule; +import org.elasticsearch.index.analysis.AnalysisModule; import org.elasticsearch.index.analysis.AnalysisService; -import org.elasticsearch.index.analysis.AnalyzerProviderFactory; -import org.elasticsearch.index.analysis.AnalyzerScope; -import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory; import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatService; import org.elasticsearch.index.codec.postingsformat.PostingsFormatService; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentMapperParser; import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.settings.IndexSettingsModule; import org.elasticsearch.index.similarity.SimilarityLookupService; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; -import org.xbib.elasticsearch.common.langdetect.Detector; -import org.xbib.elasticsearch.index.mapper.langdetect.LangdetectMapper; +import org.elasticsearch.indices.analysis.IndicesAnalysisModule; +import org.elasticsearch.indices.analysis.IndicesAnalysisService; +import org.junit.Assert; +import org.junit.Test; import java.io.IOException; -import java.util.Map; +import java.io.InputStreamReader; -import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath; +import static org.elasticsearch.common.io.Streams.copyToString; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; public class LangdetectMappingTest extends Assert { - private DocumentMapperParser mapperParser; - - @BeforeClass - public void setupMapperParser() throws IOException { - Index index = new Index("test"); - Map analyzerFactoryFactories = Maps.newHashMap(); - analyzerFactoryFactories.put("keyword", - new PreBuiltAnalyzerProviderFactory("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer())); - Settings settings = ImmutableSettings.Builder.EMPTY_SETTINGS; - AnalysisService analysisService = new AnalysisService(index, settings, null, analyzerFactoryFactories, null, null, null); - mapperParser = new DocumentMapperParser(index, - settings, - analysisService, - new PostingsFormatService(index), - new DocValuesFormatService(index), - new SimilarityLookupService(index, settings), - null); - Detector detector = new Detector(settings); - detector.start(); - mapperParser.putTypeParser(LangdetectMapper.CONTENT_TYPE, - new LangdetectMapper.TypeParser(detector)); - } - @Test public void testSimpleMappings() throws Exception { - String mapping = copyToStringFromClasspath("/simple-mapping.json"); - DocumentMapper docMapper = mapperParser.parse(mapping); - - String sampleText = copyToStringFromClasspath("/english.txt"); + String mapping = copyToStringFromClasspath("simple-mapping.json"); + DocumentMapper docMapper = createMapperParser().parse(mapping); + String sampleText = copyToStringFromClasspath("english.txt"); BytesReference json = jsonBuilder().startObject().field("_id", 1).field("someField", sampleText).endObject().bytes(); ParseContext.Document doc = docMapper.parse(json).rootDoc(); - assertEquals(doc.get(docMapper.mappers().smartName("someField").mapper().names().indexName()), sampleText); assertEquals(doc.getFields("someField.lang").length, 1); assertEquals(doc.getFields("someField.lang")[0].stringValue(), "en"); // re-parse it String builtMapping = docMapper.mappingSource().string(); - docMapper = mapperParser.parse(builtMapping); - + docMapper = createMapperParser().parse(builtMapping); json = jsonBuilder().startObject().field("_id", 1).field("someField", sampleText).endObject().bytes(); doc = docMapper.parse(json).rootDoc(); - assertEquals(doc.get(docMapper.mappers().smartName("someField").mapper().names().indexName()), sampleText); assertEquals(doc.getFields("someField.lang").length, 1); assertEquals(doc.getFields("someField.lang")[0].stringValue(), "en"); } @Test - public void testBase64() throws Exception { - String mapping = copyToStringFromClasspath("/base64-mapping.json"); - DocumentMapper docMapper = mapperParser.parse(mapping); - - String sampleBinary = copyToStringFromClasspath("/base64.txt"); - String sampleText = copyToStringFromClasspath("/base64-decoded.txt"); - + public void testBinary() throws Exception { + Settings settings = ImmutableSettings.EMPTY; + String mapping = copyToStringFromClasspath("base64-mapping.json"); + DocumentMapper docMapper = createMapperParser(settings).parse(mapping); + String sampleBinary = copyToStringFromClasspath("base64.txt"); + String sampleText = copyToStringFromClasspath("base64-decoded.txt"); BytesReference json = jsonBuilder().startObject().field("_id", 1).field("someField", sampleBinary).endObject().bytes(); ParseContext.Document doc = docMapper.parse(json).rootDoc(); - assertEquals(doc.get(docMapper.mappers().smartName("someField").mapper().names().indexName()), sampleText); assertEquals(doc.getFields("someField.lang").length, 1); assertEquals(doc.getFields("someField.lang")[0].stringValue(), "en"); // re-parse it String builtMapping = docMapper.mappingSource().string(); - docMapper = mapperParser.parse(builtMapping); - + docMapper = createMapperParser(settings).parse(builtMapping); json = jsonBuilder().startObject().field("_id", 1).field("someField", sampleText).endObject().bytes(); doc = docMapper.parse(json).rootDoc(); - assertEquals(doc.get(docMapper.mappers().smartName("someField").mapper().names().indexName()), sampleText); assertEquals(doc.getFields("someField.lang").length, 1); assertEquals(doc.getFields("someField.lang")[0].stringValue(), "en"); } + private DocumentMapperParser createMapperParser() throws IOException { + return createMapperParser(ImmutableSettings.EMPTY); + } + + private DocumentMapperParser createMapperParser(Settings fromSettings) throws IOException { + Index index = new Index("test"); + Settings settings = ImmutableSettings.settingsBuilder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put(fromSettings) + .build(); + Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), + new EnvironmentModule(new Environment(settings)), + new IndicesAnalysisModule()) + .createInjector(); + Injector injector = new ModulesBuilder().add( + new IndexSettingsModule(index, settings), + new IndexNameModule(index), + new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class))) + .createChildInjector(parentInjector); + AnalysisService service = injector.getInstance(AnalysisService.class); + DocumentMapperParser mapperParser = new DocumentMapperParser(index, + settings, + service, + new PostingsFormatService(index), + new DocValuesFormatService(index), + new SimilarityLookupService(index, settings), + null + ); + mapperParser.putTypeParser(LangdetectMapper.CONTENT_TYPE, new LangdetectMapper.TypeParser()); + return mapperParser; + } + + public String copyToStringFromClasspath(String path) throws IOException { + return copyToString(new InputStreamReader(getClass().getResource(path).openStream(), Charsets.UTF_8)); + } } diff --git a/src/test/java/org/xbib/elasticsearch/common/langdetect/LanguageTest.java b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LanguageTest.java similarity index 67% rename from src/test/java/org/xbib/elasticsearch/common/langdetect/LanguageTest.java rename to src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LanguageTest.java index 319c261..45bddb8 100644 --- a/src/test/java/org/xbib/elasticsearch/common/langdetect/LanguageTest.java +++ b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LanguageTest.java @@ -1,7 +1,9 @@ -package org.xbib.elasticsearch.common.langdetect; +package org.xbib.elasticsearch.index.mapper.langdetect; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.junit.Assert; +import org.junit.Test; + +import org.xbib.elasticsearch.index.analysis.langdetect.Language; public class LanguageTest extends Assert { @@ -15,7 +17,5 @@ public final void testLanguage() { Language lang2 = new Language("en", 1.0); assertEquals(lang2.getLanguage(), "en"); assertEquals(lang2.getProbability(), 1.0, 0.0001); - assertEquals(lang2.getLanguage(), "en"); - assertEquals(lang2.getProbability(), 1.0); } } diff --git a/src/test/java/org/xbib/elasticsearch/common/langdetect/NGramTest.java b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/NGramTest.java similarity index 96% rename from src/test/java/org/xbib/elasticsearch/common/langdetect/NGramTest.java rename to src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/NGramTest.java index c73c501..d2985ba 100644 --- a/src/test/java/org/xbib/elasticsearch/common/langdetect/NGramTest.java +++ b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/NGramTest.java @@ -1,7 +1,9 @@ -package org.xbib.elasticsearch.common.langdetect; +package org.xbib.elasticsearch.index.mapper.langdetect; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.junit.Assert; +import org.junit.Test; + +import org.xbib.elasticsearch.index.analysis.langdetect.NGram; public class NGramTest extends Assert { diff --git a/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/SimpleDetectorTest.java b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/SimpleDetectorTest.java new file mode 100644 index 0000000..65f1b50 --- /dev/null +++ b/src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/SimpleDetectorTest.java @@ -0,0 +1,22 @@ +package org.xbib.elasticsearch.index.mapper.langdetect; + +import org.elasticsearch.common.settings.ImmutableSettings; +import org.junit.Assert; +import org.junit.Test; + +import org.xbib.elasticsearch.module.langdetect.LangdetectService; + +public class SimpleDetectorTest extends Assert { + + @Test + public final void testDetector() throws Exception { + + LangdetectService detect = new LangdetectService(ImmutableSettings.EMPTY); + detect.start(); + //detect.loadDefaultProfiles(); + assertEquals("de", detect.detectAll("Das kann deutsch sein").get(0).getLanguage()); + //detect.reset(); + assertEquals("en", detect.detectAll("This is a very small test").get(0).getLanguage()); + } + +} \ No newline at end of file diff --git a/src/test/resources/base64-mapping.json b/src/test/resources/base64-mapping.json deleted file mode 100644 index d101082..0000000 --- a/src/test/resources/base64-mapping.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "someType" : { - "properties" : { - "someField":{ "type" : "langdetect" } - } - } -} \ No newline at end of file diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties deleted file mode 100644 index 185ce84..0000000 --- a/src/test/resources/log4j.properties +++ /dev/null @@ -1,5 +0,0 @@ -log4j.rootLogger=INFO, out - -log4j.appender.out=org.apache.log4j.ConsoleAppender -log4j.appender.out.layout=org.apache.log4j.PatternLayout -log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c][%t] %m%n diff --git a/src/test/resources/log4j2.xml b/src/test/resources/log4j2.xml new file mode 100644 index 0000000..f71aced --- /dev/null +++ b/src/test/resources/log4j2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/base64-decoded.txt b/src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/base64-decoded.txt similarity index 100% rename from src/test/resources/base64-decoded.txt rename to src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/base64-decoded.txt diff --git a/src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/base64-mapping.json b/src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/base64-mapping.json new file mode 100644 index 0000000..e55480d --- /dev/null +++ b/src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/base64-mapping.json @@ -0,0 +1,10 @@ +{ + "someType" : { + "properties" : { + "someField": { + "type" : "langdetect", + "binary" : true + } + } + } +} \ No newline at end of file diff --git a/src/test/resources/base64.txt b/src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/base64.txt similarity index 100% rename from src/test/resources/base64.txt rename to src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/base64.txt diff --git a/src/test/resources/chinese.txt b/src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/chinese.txt similarity index 100% rename from src/test/resources/chinese.txt rename to src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/chinese.txt diff --git a/src/test/resources/english.txt b/src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/english.txt similarity index 100% rename from src/test/resources/english.txt rename to src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/english.txt diff --git a/src/test/resources/japanese.txt b/src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/japanese.txt similarity index 100% rename from src/test/resources/japanese.txt rename to src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/japanese.txt diff --git a/src/test/resources/korean.txt b/src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/korean.txt similarity index 100% rename from src/test/resources/korean.txt rename to src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/korean.txt diff --git a/src/test/resources/simple-mapping.json b/src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/simple-mapping.json similarity index 100% rename from src/test/resources/simple-mapping.json rename to src/test/resources/org/xbib/elasticsearch/index/mapper/langdetect/simple-mapping.json