From 0c0d8638f3282ff978c5732143f6f4d6a8e77246 Mon Sep 17 00:00:00 2001 From: e_ntyo Date: Sat, 27 Apr 2019 19:44:18 +0900 Subject: [PATCH] Add 'trimmer-jp' --- demos/demo-multi.html | 45 ++++++++++++++++++++++++++++++++----------- lunr.jp.js | 4 ++++ 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/demos/demo-multi.html b/demos/demo-multi.html index cc171cd..a3c0518 100644 --- a/demos/demo-multi.html +++ b/demos/demo-multi.html @@ -6,6 +6,8 @@ + + @@ -16,32 +18,53 @@ this.field('body') }); + var idxJp = lunr(function () { + this.use(lunr.jp); + this.field('body') + }); + var idxRu = lunr(function () { this.use(lunr.ru); this.field('body') }); var idxMulti = lunr(function () { - this.use(lunr.multiLanguage('en', 'ru')); + this.use(lunr.multiLanguage('en', 'jp', 'ru')); this.field('body') }); - idxEn.add({"body": "Этот текст написан на русском.", "id": 1}) - idxRu.add({"body": "Этот текст написан на русском.", "id": 1}) - idxMulti.add({"body": "Этот текст написан на русском.", "id": 1}) + var doc1 = {"body": "Этот текст написан на русском.", "id": 1} + idxEn.add(doc1) + idxRu.add(doc1) + idxMulti.add(doc1) - idxEn.add({"body": "This text is written in the English language.", "id": 2}) - idxRu.add({"body": "This text is written in the English language.", "id": 2}) - idxMulti.add({"body": "This text is written in the English language.", "id": 2}) + var doc2 = {"body": "This text is written in the English language.", "id": 2} + idxEn.add(doc2) + idxJp.add(doc2) + idxRu.add(doc2) + idxMulti.add(doc2) - console.log('Search for `Русских` (English pipeline): ', idxEn.search('Русских')); + var doc3 = {"body": "このテキストは日本語で書かれています。", "id": 3}; + idxEn.add(doc3) + idxJp.add(doc3) + idxRu.add(doc3) + idxMulti.add(doc3) + + console.log('Search for `Этот текст` (English pipeline): ', idxEn.search('Этот текст')); + console.log('Search for `日本語` (English pipeline): ', idxEn.search('日本語')); console.log('Search for `languages` (English pipeline): ', idxEn.search('languages')); - console.log('Search for `Русских` (Russian pipeline): ', idxRu.search('Русских')); + console.log('Search for `Этот текст` (Russian pipeline): ', idxRu.search('Этот текст')); + console.log('Search for `日本語` (Russian pipeline): ', idxRu.search('日本語')); console.log('Search for `languages` (Russian pipeline): ', idxRu.search('languages')); - console.log('Search for `Русских` (Ru + En pipeline): ', idxMulti.search('Русских')); - console.log('Search for `languages` (Ru + En pipeline): ', idxMulti.search('languages')); + console.log('Search for `Этот текст` (Japanese pipeline): ', idxJp.search('Этот текст')); + console.log('Search for `日本語` (Japanese pipeline): ', idxJp.search('日本語')); + console.log('Search for `languages` (Japanese pipeline): ', idxJp.search('languages')); + + console.log('Search for `Этот текст` (Ru + Jp + En pipeline): ', idxMulti.search('Этот текст')); + console.log('Search for `日本語` (Ru + Jp + En pipeline): ', idxMulti.search('日本語')); + console.log('Search for `languages` (Ru + Jp + En pipeline): ', idxMulti.search('languages')); diff --git a/lunr.jp.js b/lunr.jp.js index 4dcdc43..f29f928 100644 --- a/lunr.jp.js +++ b/lunr.jp.js @@ -108,6 +108,10 @@ lunr.jp.stopWordFilter.stopWords = new lunr.SortedSet(); lunr.jp.stopWordFilter.stopWords.length = 45; + lunr.jp['wordCharacters'] = '一二三四五六七八九十百千万億兆一-龠々〆ヵヶぁ-んァ-ヴーア-ン゙a-zA-Za-zA-Z0-90-9'; + lunr.jp.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.jp.wordCharacters); + lunr.Pipeline.registerFunction(lunr.jp.trimmer, 'trimmer-jp'); + // The space at the beginning is crucial: It marks the empty string // as a stop word. lunr.js crashes during search when documents // processed by the pipeline still contain the empty string.