Skip to content
This repository has been archived by the owner on Dec 18, 2021. It is now read-only.

Commit

Permalink
adding short text profiles
Browse files Browse the repository at this point in the history
  • Loading branch information
jprante committed Nov 20, 2014
1 parent 73221c6 commit 91e1b4b
Show file tree
Hide file tree
Showing 49 changed files with 57 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

<groupId>org.xbib.elasticsearch.plugin</groupId>
<artifactId>elasticsearch-langdetect</artifactId>
<version>1.4.0.0</version>
<version>1.4.0.1</version>

<packaging>jar</packaging>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ public class LangdetectService extends AbstractLifecycleComponent<LangdetectServ

private Map<String,String> langmap = new HashMap<String,String>();

private String profile;

private double alpha;

private double alpha_width;
Expand All @@ -112,6 +114,7 @@ public LangdetectService(Settings settings) {

@Override
protected void doStart() throws ElasticsearchException {
this.profile = settings.get("profile", "/langdetect/");
load(settings);
this.priorMap = null;
this.n_trial = settings.getAsInt("number_of_trials", 7);
Expand Down Expand Up @@ -147,7 +150,7 @@ private void load(Settings settings) {
int size = keys.length;
for (String key : keys) {
if (key != null && !key.isEmpty()) {
loadProfileFromResource(key, index++, size);
loadProfileFromResource(key, profile, index++, size);
}
}
logger.debug("language detection service installed for {}", langlist);
Expand All @@ -163,7 +166,7 @@ private void load(Settings settings) {
}
if (map.getAsMap().isEmpty()) {
// is in "map" a resource name?
String s = settings.get("map") != null ? settings.get("map") : "/langdetect/language.json";
String s = settings.get("map") != null ? settings.get("map") : profile + "language.json";
InputStream in = getClass().getResourceAsStream(s);
if (in != null) {
map = ImmutableSettings.settingsBuilder().loadFromStream(s, in).build();
Expand All @@ -176,14 +179,14 @@ private void load(Settings settings) {
}
}

public void loadProfileFromResource(String resource, int index, int langsize) throws IOException {
InputStream in = getClass().getResourceAsStream("/langdetect/" + resource);
public void loadProfileFromResource(String resource, String profile, int index, int langsize) throws IOException {
InputStream in = getClass().getResourceAsStream(profile + resource);
if (in == null) {
throw new IOException("profile '" + resource + "' not found");
}
ObjectMapper mapper = new ObjectMapper();
LangProfile profile = mapper.readValue(in, LangProfile.class);
addProfile(profile, index, langsize);
LangProfile langProfile = mapper.readValue(in, LangProfile.class);
addProfile(langProfile, index, langsize);
}

public void addProfile(LangProfile profile, int index, int langsize) throws IOException {
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/ar

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/bg

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/bn

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/ca

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/cs

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/da

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/de

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/el

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/en

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/es

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/et

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/fa

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/fi

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/fr

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/gu

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/he

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/hi

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/hr

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/hu

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/id

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/it

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/ja

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/ko
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"freq":{"¤":8,"¸":6,"·":50,"´":30," 가ㅏ":15," 가ㅎ":12," 가ㅈ":7," 가ㅋ":98," 가ㅅ":7," 가ㅇ":26," 가ㅁ":13," 가ㅜ":11," 가ㅐ":7," 가ㅓ":10," 가ㄹ":31," 가ㄴ":29," 가ㄱ":20,"ㄱㄱ ":11," 가ㅣ":10," 가ㅠ":20," 가ㅡ":9,"ˇ":6,"˝":6,"ㅅ가가":25,"ω":19,"가가ㄹ":54,"가가ㄱ":37,"가가ㄴ":92,"가가ㄷ":32,"가가ㅡ":50,"가가ㅠ":600,"가가ㅣ":46,"가가ㅜ":179,"가가ㅗ":8,"가가ㅔ":17,"가가ㅓ":33,"가가ㅐ":12,"가가ㅑ":7,"가가ㅏ":68,"가가ㅎ":104,"가가ㅌ":12,"가가ㅋ":1102,"가가ㅈ":10,"가가ㅇ":106," ㄹ ":6,"가가ㅆ":10,"가가ㅅ":29,"가가ㅂ":6,"가가ㅁ":20," ´":22," ·":8,"가가」":32,"가가「":31,"가가』":11,"가가】":7," ㅁ ":63," ㅂ ":19," ㅇ ":81,"ㅡㅡㅡ":10," ㅅ ":66," ㄱ ":11," ㄷ ":9," ω":10," ㄴ ":6,"ㅈ가가":16,"ㅠㅜㅠ":47,"ㅠㅜㅜ":54,"ㅠㅠㅋ":9,"ㅠㅠㅜ":47,"ㅠㅠㅠ":2506,"ㅠㅡㅠ":12,"가/가":6,"ㅇ가가":82,"๑":12,"ㅠㅋㅋ":8,"า":6,"ᆢ":11,"ᆞ":18," "":8," ノ":19," 。":6,"  ̄":6,"가가▶":13,"ㅇㅇ가":12," ๑":9,"ㅁ가가":17,"´ ":8,"】가":7,"가가→":10,"www":10,"「가":83,"【가":7,"」가":33,"『가":17,"· ":6," ㅠ ":124," ㅡ ":19," ㅜ ":20," ㅍ ":10," ㅎ ":47," ㅋ ":92,"ㅂ가가":11,"ㅋㅋ가":741,"∇":9,"∀":10,"가ㅣ ":7,"ㄹㄹ ":7,"⊙":14,"가ㅜ ":14,"⌒":10,"가ㅠ ":57," 」 ":29," 「 ":8,"가ㅓ ":12,"ノ ":9,"가ㅋ ":48,"가ㅐ ":6,"↑":19,"→":21,"↗":6,"가ㅎ ":19,"가ㅏ ":16," ⊙":7,"가ㅇ ":18," ⌒":8,"가ㄹ ":19,"ㅋㄱ가":10,"가ㅁ ":11,"가ㄱ ":12,"가ㄴ ":20,"가가ᆢ":6,"가가ᆞ":11,"┌":6,"─":14," →":8,"▶":46,"▷":7,"▽":21,"□":11,"◐":11,"◑":9,"◇":9,"●":18,"○":9,"ㄷㄷ ":24,"ww":11," ▽":10," ▶":25," ▷":7," ●":13,"ㅠ가ㅠ":9," 丈가":9,"가·가":38,"▶▶ ":6,"ああ ":6," ̄ ":6," 【":11," 『":14," 「":59," 」":64,"  ":6,"ㅣ":106,"ㅡ":253,"ㅠ":5116,"ㅔ":42,"ㅕ":20,"ㅗ":65,"ㅐ":36,"ㄹ가가":56,"ㅑ":14,"ㅓ":75,"ㅜ":1064,"ㅛ":9,"ㅅ":213,"ㅇ":635,"ㅆ":17,"ㅁ":191,"ㅃ":11,"ㅂ":77,"ㅍ":30,"ㅌ":55,"ㅏ":203,"ㅎ":887,"ㅉ":19,"ㅈ":58,"ㅋ":18985,"ㅊ":18,"ㄲ":26,"ㄱ":238,"ㄷ":173,"ㄴ":225,"ㄸ":15,"ㄹ":159," ア ":15,"あ":51,"? ":6," 가 ":12034," ":15,"』":15,"『":17,"」":105,"「":104,"【":14,"】":14,"" ":8,"〜":7,"ア":38,"─ ":6,"ㄴㄴ ":8,"ㅎㅎ가":30," ㅜ":143," ㅗ":7," ㅋ":1426," ㅉ":6," ㅈ":23," ㅏ":9," ㅎ":263," ㅍ":18," ㅌ":10," ㅂ":30," ㅁ":82," ㅇ":266," ㅅ":95," ㄸ":11," ㄹ":15," ㄱ":36," ㄲ":7," ㄴ":38," ㄷ":40,"가가 ":101467," ㅡ":74," ㅠ":709," ㅣ":10,"가가·":32," あ":16," ア":23,"」가 ":10," あ ":11,"人人人":6,"` ":9,"「가 ":16,"ㄱ가 ":13," ̄ ̄":8,"。 ":6,"ㅣㅣ":7,"ㅣㅇ":6,"ㅡㅡ":84,"ㅡㅠ":13,"ㅡㅜ":11,"ㅠㅠ":3579,"ㅠㅡ":17,"/가가":6,"ㅕㅕ":7,"ㅗㅗ":38,"ㅓㅓ":14,"ㅜㅜ":559,"ㅜㅡ":11,"ㅜㅠ":119,"ㅠㅋ":9,"ㅠㅜ":132,"ㅅㅎ":10,"ㅅㅇ":34,"ㅅㅂ":18,"ㅇㅠ":6,"ㅈㄹ":6,"ㅇㅏ":8,"ㅇㅇ":75,"ㅇㅎ":8,"ㅇㅋ":9,"ㅇㅁ":52,"ㅇㅂ":10,"ㅇㅅ":42,"ㅁㅇ":49,"ㅁㅁ":7,"ㅂㅇ":10,"ㅎㅇ":13,"ㅎㅅ":10,"ㄴ가가":75," "가":7,"ㅌㅌ":9,"ㅏㅏ":52,"ㅎㅎ":468,"ㅉㅉ":8,"ㅁㅇ ":40,"ㅋㅋ":16188,"ㅋㅌ":14,"ㅋㅠ":7,"ㅌㅋ":25,"ㅋㄱ":75,"ㅋㄲ":6,"ㄱㄱ":49,"ㄱㅋ":55,"ㄷㄷ":78,"ㄴㄴ":29,"/가":6,"ㄸㄹ":12,"ㄹㄹ":17,"ㄱㅋㅋ":32,"ㄱㅋㄱ":12,"之":27,"中":11,"並":16,"丘":15,"丈":12,"三":53,"丁":17,"人":10,"亞":8,"亂":8,"ㅣ가 ":13,""가":7,"ㄷ가가":6,"三三":7," ▽ ":10,"가ㄷㄷ":10,"가ㄱㄱ":8," ▶ ":12," 丘":7,"가ㅇㅇ":15," 丈":12," 三":20," 丁":7,"가ㅇㅁ":8,"가ㅇㅅ":10,"ㄱㄱㅋ":9,"ㄱㄱㄱ":15,"가ㅏㅏ":19,"가ㅎㅎ":69,"가ㅌㅋ":6,"가ㅋㅋ":1130," ● ":9,"가ㅓㅓ":6,"가ㅜㅜ":118,"가ㅜㅠ":23,"가ㅠㅜ":32," 中":10," 之":13,"가ㅡㅡ":32,"가ㅠㅡ":6,"가ㅠㅠ":506,"人人":7,"→ ":9,"↑ ":6,"ㄱ가가":41,"ㅠ가 ":26," ㅠ가":9," ㅡ가":7," ㅎ가":8," ㅋ가":10," ㅈ가":7," ㅇ가":23," ㅅ가":7,"ああ":27,"▶가":7," ㅁ가":6," ㄴ가":8,"ㅋㄱ ":6," ㄱ가":6,"ㅇㅇ ":37,"▶가가":7,"」「":6,"ㅏ가ㅏ":9,"ㅜ가 ":7,"ㅔ가 ":8,""가가":6,"ᆞ가가":11,"ㄸㄹㄹ":9,"ω ":11,"ㅅㅂ ":9,"ㅅㅇ ":21,"⌒ ":8,"ㅠㅜ가":9,"ㅂㅇ ":7,"↑↑":8,"ㅜㅜ가":53,"가ㄹ":87,"가ㄷ":36,"가ㄴ":124,"가ㄲ":6,"가ㄱ":59,"가ㅛ":6,"가ㅜ":196,"가ㅑ":9,"가ㅐ":19,"가ㅓ":48,"가ㅕ":6,"가ㅔ":24,"가ㅗ":10,"가ㅈ":18,"가ㅋ":1235,"가ㅌ":14,"가ㅍ":6,"가ㅎ":120,"가ㅏ":101,"가ㅁ":34,"가ㅂ":8,"가ㅅ":40,"가ㅆ":11,"가ㅇ":137,"가ㅣ":62,"가ㅠ":655,"가ㅡ":61,"ㄷㄷㄷ":37,"가』":14,"가「":35,"가」":34,"ㅋ가ㅋ":25," 之 ":8,"가】":7,"ㅋ가ㅠ":14,"ㅅ가 ":16,"가ᆞ":12,"가ᆢ":8,"가가가":150443,"ㅇ가 ":33,"】 ":6,"』 ":10,"」 ":44,"「 ":14,"ㅋㅋ ":955,"ㅎㅎ ":124,"👍":18,"ㅠㅠ가":224," 가가":104979,"あ ":18,"ㅡㅡ가":9,"가→":11,"→가가":7,"ㅋ가 ":92,"ㅏㅏ ":10," 『가":14," 「가":48," 」가":15,"ㅈ가 ":7,"ㅣ가가":37,"·가가":33,"가─":7,"가▶":13," ノ ":9,"ㄷ ":39,"ㅡ가가":28,"ㄴ ":40,"ㄲ ":8,"ㄱ ":44,"ㅁ ":79,"ㄹ ":42,"ㅈ ":12,"ㅇㅇㅇ":10,"๑ ":8,"ㅇㅅㅇ":32,"ㅇ ":221,"ㅅ ":79,"ㅂ ":31,"가" ":8," 가":117510,"ㅇㅁㅇ":47,"ㅏ가 ":14,"가/":6,"가"":8,"ㅇㅂㅇ":10,"가ㅡ가":11,"가ㅠ가":26,"가ㅣ가":40,"ㅠ가가":227,"가ㅜ가":14,"가ㅓ가":15,"가ㅔ가":10,"가ㅏ가":51,"가ㅐ가":9,"가ㅋ가":23,"가":378431,"가ㅎ가":16,"三가":7,"丈가":9,"ㅋ ":1110,"ㅍ ":13,"ㅏ ":40,"ㅎ ":201,"丈가가":8,"ㅐ ":10,"ㅓ ":17,"ㅔ ":10,"ㅗ ":8,"ㅜ ":141,"ㅠ ":694,"ㅡ ":73,"ㅣ ":13,"ㅋㅠㅠ":6,"가·":39,"가 ":114163,"ㅌㅋㅋ":19,"ㅋㅌㅋ":11,"ㅋㅋㅠ":6,"ㅓ가 ":8,"ㅋㅋㄲ":6,"ㅋㅋㄱ":50,"ㅋㅋㅋ":13704,"ㅋㅋㅌ":9,"ア ":21,"ㅋㄱㄱ":18,"ㅋㄱㅋ":36,"ㅜ가가":72,"👍 ":6,"ㅎㅇㅎ":6,"가ㅁ가":17,"ㅎㅅㅎ":9,"가ㅂ가":7,"가ㅅ가":32,"ㅎㅎㅎ":232,"가ㅇ가":65,"가ㅈ가":13,"가ㄱ가":30,"가ㄴ가":90,"가ㄹ가":63,"가ㄷ가":13,"ㅜ가":86,"ㅣ가":59,"ㅠ가":267,"ㅡ가":33,"ㅔ가":19,"ㅓ가":21,"ㅏㅏㅏ":32,"ㅎ가":57," ㅡㅡ":39,"ㅍ가":7," ㅠㅠ":504," ㅠㅡ":8,"ㅌ가":11,"ㅋ가":782," ㅠㅜ":15,"ㅑ가":6,"ㅐ가":14,"ㅏ가":79,"ㅅ가":49,"ㅆ가":8,"ㅇ가":122,"ㅜㅠ ":18,"ㅈ가":24," ㅜㅜ":90," ㅜㅠ":12,"ㅜㅜ ":88,"ㅁ가":27,"ㅂ가":15," ㅇㅏ":6," ㅇㅇ":48," ㅋㄱ":6,"ㄴ가":131,"ㄷ가":20," ㅋㅋ":1269," ㅎㅎ":164,"ㄹ가":82," ㅎㅅ":8," ㅎㅇ":7,"가ᆞ가":8," ㅅㅂ":13,"ㄴ가 ":46,"ㄱ가":57," ㅇㅁ":43," ㅇㅂ":9," ㅇㅅ":30," → ":7,"·가":40,"?":18," ?":17,"▶ ":18,"ㅓㅓㅓ":8,"▽ ":12,"◇ ":6,"ㅠㅜ ":11,"ㅓ가가":9,"?":8,"?":38,"● ":13,"ㅠㅠ ":476,"ㄴㄴ가":14,"가가/":6,"가가"":8,"가👍":12,"ㄹ가 ":17,"ㅡㅠ ":9,"ㅔ가가":8,"ㅡㅡ ":47,"가」가":18,"가「가":27,"?가":9,"가?":15,"ㄷ가 ":12,"👍가":9,"「가가":66,"ㅏ가가":50,"」가가":19,"ㅗㅗㅗ":29," ω ":9," ▶가":7,"『가가":16,"ㅐ가가":6,"ᆞ가":12,"?가가":9,"【가가":7,"ㅎ가가":45,"】가가":6,"丘 ":9,"가가?":15,"가→가":7,""":16,")":8,"(":8,"/":13,"^":10,"_":8,"`":11,"w":13,"・":8,"。":8,"ノ":19,"゚":8," ̄":20,"가가":257747,"ㅋ가가":638,"ㅜㅠㅠ":44,"ㅜㅠㅜ":34,"▶▶":6,"ㅜㅡㅜ":6," ⌒ ":8,"ㅁ가 ":9,"□□":7,"三 ":15,"가』 ":10,"가」 ":11,"ㅜㅜㅡ":6,"ㅜㅜㅠ":32,"ㅜㅜㅜ":289,"丁 ":7,"ㅌ가가":8," ㄴㄴ":18," ㄱㄱ":7," ㄸㄹ":10," ㄷㄷ":27,"→가":7,"あああ":17,"之 ":11},"n_words":[408659,524156,400873],"name":"ko"}
1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/lt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/lv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/mk

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/ml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/nl

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/no

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/pa

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/pl

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/pt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/ro

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/ru

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/si

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/sq

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/sv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/ta

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/te

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/th

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/tl

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/tr

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/uk

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/ur

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/vi

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/zh-cn

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/main/resources/langdetect/short-text/zh-tw

Large diffs are not rendered by default.

0 comments on commit 91e1b4b

Please sign in to comment.