forked from smc/mlmorph
-
Notifications
You must be signed in to change notification settings - Fork 0
/
noun.fst
72 lines (56 loc) · 3.06 KB
/
noun.fst
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include "symbols.fst"
$NSTEM$ = "<lexicon/nouns.a>"
$SANSKRIT$ = "<lexicon/nouns-sanskrit.a>"
$PRONOUN$ = "<lexicon/pronouns.a>"
$PROPERNOUN$ = "<lexicon/proper-nouns.a>"
$CONJUNCTION$ = "<lexicon/conjunctions.a>"
$QUANTIFIERS$ = "<lexicon/quantifiers.a>"
$POSTPOSITIONS$ = "<lexicon/postpositions.a>"
$INDECLINABLE$ = "<lexicon/indeclinables.a>"
$ABBREV$ = "<lexicon/abbreviations.a>"
$DEM$ = "<lexicon/demonstratives.a>"
$VSTEM$ = "<lexicon/verbs.a>"
$BORROWED$ = "<lexicon/borrowed-words.a>"
$PLURAL$ = "<ninfl/plural.a>"
$NINFL$ = "<ninfl/ninfl.a>"
$ADJECTIVE$ = "<lexicon/adjectives.a>"
$POLARITY$ = "<lexicon/polarity.a>"
$NUMBER$ = "<num.a>"
$VERB$ = "<verb.a>"
$VERB_ADVERBS$ = "<verb-adverbs.a>"
% Derived Nouns
$NOUNFROMVERB$ = "<deriv/noun-from-verb.a>"
$NOUNFROMNOUN$ = "<deriv/noun-from-noun.a>"
$NOUNFROMADVERB$ = "<deriv/noun-from-adverb.a>"
$DERIVEDNOUNS$ = (($VERB$ <n> <deriv> ) || $NOUNFROMVERB$) |\
( $VERB_ADVERBS$+ <n> <deriv> || $NOUNFROMADVERB$ ) |\
( ($NSTEM$ [<masculine><feminine><neutral>] <n> <deriv>) || $NOUNFROMNOUN$)
$ordinal_forms$= ({}:{ആം}|{}:{ആമത്തെ}|{}:{ആമത്}|{}:{ആമതു്}) <ordinal>
$NUMBERIC$ = $NUMBER$ $ordinal_forms$?
% Agglutination
$ALL_NOUNS$ = $NSTEM$ | $PROPERNOUN$ | $NUMBER$ | $SANSKRIT$
$AGGLUTINATED_NOUN$ = ( ($NSTEM$ | $NUMBER$ | $SANSKRIT$) <adj> )* ( $NSTEM$ | $PROPERNOUN$ | $SANSKRIT$) % വിശേഷണവിശേഷ്യങ്ങൾ
$COORDINATIVE_NOUN$ = $NSTEM$ <coordinative> $NSTEM$ % ദ്വന്ദസമാസം - ആനകുതിര, അച്ഛനമ്മ..
$COMPOUND_NSTEM$ = $AGGLUTINATED_NOUN$ | $COORDINATIVE_NOUN$
$SINGULAR_NOUN$ = $COMPOUND_NSTEM$ | $PRONOUN$ | $ABBREV$ | $BORROWED$ | $DERIVEDNOUNS$ | $NUMBERIC$
$PLURAL_NOUN$ = $SINGULAR_NOUN$ <pl> || $PLURAL$
$SUFFIXES$ = $POSTPOSITIONS$ | $CONJUNCTION$ | $POLARITY$ | $QUANTIFIERS$ | $INDECLINABLE$
$SUFFIXES$ = $SUFFIXES$? $SUFFIXES$? $SUFFIXES$ % Atmost 3 times
$NOUN$ = $DEM$ |\
( $DEM$? $ADJECTIVE$? ( $SINGULAR_NOUN$ | ( $PLURAL_NOUN$ <EoW><RB> ) ) [#ninfl#]? $SUFFIXES$? )
$NOUN$ = $NOUN$ || $NINFL$
% Derive standalone adjective form for the noun. But do it only if the noun ends with virama or anuswara.
% To avoid conjunctions ഉം posing as standalone adjective ഉ, demand at least two letters before virama or vowel.
$ADJ_CANDIDATES_FILTER_1$ = [#AAsym#]+ [#Letters#]+ [#Letters#] [ം്] [#POS##Numbers##infl##TMP##BM#]+
$ADJ_CANDIDATES_FILTER_2$ = [#AAsym#]+ [#Letters#]+ <sanskrit> [#POS##Numbers##infl##TMP##BM#]+
$ADJ_CANDIDATES_FILTER$ = $ADJ_CANDIDATES_FILTER_1$ | $ADJ_CANDIDATES_FILTER_2$
$ADJ_CANDIDATES$ = $NOUN$ || $ADJ_CANDIDATES_FILTER$
$ADJ_PART$ = ( $ADJ_CANDIDATES$ <adj>) || "<ninfl/standalone-adjective.a>"
$NOUN$ = $NOUN$ | $ADJ_PART$
$test$ = വഴി<n><RB><locative>കൂടി<cnj><RB> | വഴി<n><RB> | വഴി<n><RB><locative> | \
ആശ<sanskrit><RB> | ആശ<sanskrit><RB><adj> | ആശ<sanskrit><RB><adj>ദീപം<n><RB> | \
ആശ<sanskrit><RB><adj>ദീപം<n><RB><adj>
$test$ || $NOUN$ >> "noun.test.a"
% $ADJ_PART$ >> "adj.parts.a"
$VERB$ <n> <deriv> >> "verb-noun-deriv.a"
$NOUN$