From e49bb3211503ad4ec48bf38e7c432330b1503c9a Mon Sep 17 00:00:00 2001 From: lparam Date: Sat, 21 Oct 2017 01:01:08 +0800 Subject: [PATCH] import chinese non-breaking prefix from moses --- .../nonbreaking_prefix.zh | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh new file mode 100644 index 0000000..6a090be --- /dev/null +++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.zh @@ -0,0 +1,53 @@ +# +# Mandarin (Chinese) +# +# Anything in this file, followed by a period, +# does NOT indicate an end-of-sentence marker. +# +# English/Euro-language given-name initials (appearing in +# news, periodicals, etc.) +A +Ā +B +C +Č +D +E +Ē +F +G +Ģ +H +I +Ī +J +K +Ķ +L +Ļ +M +N +Ņ +O +P +Q +R +S +Š +T +U +Ū +V +W +X +Y +Z +Ž + +# Numbers only. These should only induce breaks when followed by +# a numeric sequence. +# Add NUMERIC_ONLY after the word for this function. This case is +# mostly for the english "No." which can either be a sentence of its +# own, or if followed by a number, a non-breaking prefix. +No #NUMERIC_ONLY# +Nr #NUMERIC_ONLY#