Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'trimmer-jp' #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 34 additions & 11 deletions demos/demo-multi.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
<script src="lib/lunr.js"></script>
<script src="../lunr.stemmer.support.js"></script>
<script src="../lunr.ru.js"></script>
<script src="../tinyseg.js"></script>
<script src="../lunr.jp.js"></script>
<script src="../lunr.multi.js"></script>
</head>
<body>
Expand All @@ -16,32 +18,53 @@
this.field('body')
});

var idxJp = lunr(function () {
this.use(lunr.jp);
this.field('body')
});

var idxRu = lunr(function () {
this.use(lunr.ru);
this.field('body')
});

var idxMulti = lunr(function () {
this.use(lunr.multiLanguage('en', 'ru'));
this.use(lunr.multiLanguage('en', 'jp', 'ru'));
this.field('body')
});

idxEn.add({"body": "Этот текст написан на русском.", "id": 1})
idxRu.add({"body": "Этот текст написан на русском.", "id": 1})
idxMulti.add({"body": "Этот текст написан на русском.", "id": 1})
var doc1 = {"body": "Этот текст написан на русском.", "id": 1}
idxEn.add(doc1)
idxRu.add(doc1)
idxMulti.add(doc1)

idxEn.add({"body": "This text is written in the English language.", "id": 2})
idxRu.add({"body": "This text is written in the English language.", "id": 2})
idxMulti.add({"body": "This text is written in the English language.", "id": 2})
var doc2 = {"body": "This text is written in the English language.", "id": 2}
idxEn.add(doc2)
idxJp.add(doc2)
idxRu.add(doc2)
idxMulti.add(doc2)

console.log('Search for `Русских` (English pipeline): ', idxEn.search('Русских'));
var doc3 = {"body": "このテキストは日本語で書かれています。", "id": 3};
idxEn.add(doc3)
idxJp.add(doc3)
idxRu.add(doc3)
idxMulti.add(doc3)

console.log('Search for `Этот текст` (English pipeline): ', idxEn.search('Этот текст'));
console.log('Search for `日本語` (English pipeline): ', idxEn.search('日本語'));
console.log('Search for `languages` (English pipeline): ', idxEn.search('languages'));

console.log('Search for `Русских` (Russian pipeline): ', idxRu.search('Русских'));
console.log('Search for `Этот текст` (Russian pipeline): ', idxRu.search('Этот текст'));
console.log('Search for `日本語` (Russian pipeline): ', idxRu.search('日本語'));
console.log('Search for `languages` (Russian pipeline): ', idxRu.search('languages'));

console.log('Search for `Русских` (Ru + En pipeline): ', idxMulti.search('Русских'));
console.log('Search for `languages` (Ru + En pipeline): ', idxMulti.search('languages'));
console.log('Search for `Этот текст` (Japanese pipeline): ', idxJp.search('Этот текст'));
console.log('Search for `日本語` (Japanese pipeline): ', idxJp.search('日本語'));
console.log('Search for `languages` (Japanese pipeline): ', idxJp.search('languages'));

console.log('Search for `Этот текст` (Ru + Jp + En pipeline): ', idxMulti.search('Этот текст'));
console.log('Search for `日本語` (Ru + Jp + En pipeline): ', idxMulti.search('日本語'));
console.log('Search for `languages` (Ru + Jp + En pipeline): ', idxMulti.search('languages'));

</script>
</body>
Expand Down
4 changes: 4 additions & 0 deletions lunr.jp.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@
lunr.jp.stopWordFilter.stopWords = new lunr.SortedSet();
lunr.jp.stopWordFilter.stopWords.length = 45;

lunr.jp['wordCharacters'] = '一二三四五六七八九十百千万億兆一-龠々〆ヵヶぁ-んァ-ヴーア-ン゙a-zA-Za-zA-Z0-90-9';
lunr.jp.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.jp.wordCharacters);
lunr.Pipeline.registerFunction(lunr.jp.trimmer, 'trimmer-jp');

// The space at the beginning is crucial: It marks the empty string
// as a stop word. lunr.js crashes during search when documents
// processed by the pipeline still contain the empty string.
Expand Down