From 2496d51bfeeabc156b80ed7439c44db1ac452ec9 Mon Sep 17 00:00:00 2001 From: Kazuma Arimura Date: Sat, 7 Dec 2024 02:00:03 +0000 Subject: [PATCH] add documentation for kuromoji_completion (#117808) --- docs/plugins/analysis-kuromoji.asciidoc | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc index 0a167bf3f0240..217d88f361223 100644 --- a/docs/plugins/analysis-kuromoji.asciidoc +++ b/docs/plugins/analysis-kuromoji.asciidoc @@ -750,3 +750,39 @@ Which results in: ] } -------------------------------------------------- + +[[analysis-kuromoji-completion]] +==== `kuromoji_completion` token filter + +The `kuromoji_completion` token filter adds Japanese romanized tokens to the term attributes along with the original tokens (surface forms). + +[source,console] +-------------------------------------------------- +GET _analyze +{ + "analyzer": "kuromoji_completion", + "text": "寿司" <1> +} +-------------------------------------------------- + +<1> Returns `寿司`, `susi` (Kunrei-shiki) and `sushi` (Hepburn-shiki). + +The `kuromoji_completion` token filter accepts the following settings: + +`mode`:: ++ +-- + +The tokenization mode determines how the tokenizer handles compound and +unknown words. It can be set to: + +`index`:: + + Simple romanization. Expected to be used when indexing. + +`query`:: + + Input Method aware romanization. Expected to be used when querying. + +Defaults to `index`. +--