From 30ca0cca07eabf0030105b1ae8e4a422f216dd95 Mon Sep 17 00:00:00 2001 From: khumnath Date: Wed, 15 Nov 2023 21:28:16 +0900 Subject: [PATCH 1/2] aded and modified some characters for language ne --- schemes/ne/ne.scheme | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/schemes/ne/ne.scheme b/schemes/ne/ne.scheme index 3520cdd..5fedb70 100644 --- a/schemes/ne/ne.scheme +++ b/schemes/ne/ne.scheme @@ -14,9 +14,12 @@ author "Rohit Bansal" ignore_duplicates false $virama = "\u{094D}" -$am = "\u{0902}" -$an = "\u{0901}" +$am = "\u{0902}" #not working as expected +$an = "\u{0901}" #not working as expected $nukta = "\u{093C}" +$chandrabindu = "ँ" +$anusvara = "ं" +$visarga ="ः" virama "~" => "\u{094D}" @@ -59,7 +62,8 @@ consonants "k" => "क", "ng" => "ङ", "ch" => "च", - ["CH", ["ch"]] => "छ", + "chh" => "छ", + ["Ch", ["ch"]] => "छ", "j" => "ज", ["z", ["j"]] => ["\u{095B}", "ज#{$nukta}"], ["jh", ["j"]] => "झ", @@ -149,7 +153,8 @@ consonants "kShN" => "क्ष्ण", #tikShN "spr" => "स्प्र", "smr" => "स्म्र", "Shtr" => "ष्त्र", - "Shthy" => "ष्थ्य" + "Shthy" => "ष्थ्य", + "gny" => "ज्ञ" ignore_duplicates true can_make_cluster.each do |c1| @@ -186,7 +191,10 @@ end symbols [["n", "m"]] => $an, ["om"] => "ॐ", ["?"] => "ॽ", - ["|"] => "।" + ["|"] => "।", + ["*"] => $anusvara, + ["**"] => $chandrabindu, + ["H"] => $visarga symbols({:priority => :low}, ["aan", "aam"] => $an) From 0c8d325c16939331c6573e995b70b1a382642075 Mon Sep 17 00:00:00 2001 From: khumnath Date: Thu, 7 Dec 2023 08:16:14 +0900 Subject: [PATCH 2/2] aded nepali unit test files --- test/ne.rb | 34 ++++++++++++++++++++++++++++++++++ test/run.rb | 1 + 2 files changed, 35 insertions(+) create mode 100644 test/ne.rb diff --git a/test/ne.rb b/test/ne.rb new file mode 100644 index 0000000..ba6a67c --- /dev/null +++ b/test/ne.rb @@ -0,0 +1,34 @@ +# encoding: utf-8 + +class TestNe < Minitest::Test + def setup + @varnam = get_varnam_handle('ne') + @varnam.config(Varnam::VARNAM_CONFIG_SET_TOKENIZER_SUGGESTIONS_LIMIT, 30) + end + + def test_words + list = { + 'kitaab' => 'किताब', + 'pitaa' => 'पिता', + 'prastuti' => 'प्रस्तुति', + 'kaaThamaanDau' => 'काठमान्डौ', + # not working vowel+vowel combinations + 'euTaa' => 'एउटा', + 'aaimaai' => 'आइमाइ' + } + list.each do |pattern, expected| + # TODO assert length of result array too + assert_equal expected, @varnam.transliterate(pattern)[0].Word + end + end + + def test_reverse_transliteration + list = { + 'नेपाली' => %w[nepaalee nepaalI nepAlee nepaalii nepali] + } + + list.each do |word, expected| + assert_equal expected, @varnam.reverse_transliterate(word) + end + end +end diff --git a/test/run.rb b/test/run.rb index 8b1c457..a60f7ec 100755 --- a/test/run.rb +++ b/test/run.rb @@ -25,3 +25,4 @@ def get_varnam_handle(scheme_id) require "minitest/autorun" require_relative './ml' require_relative './hi' +require_relative './ne'