Skip to content

Commit

Permalink
Updated reading and visual normalization tests for Perso-Arabic.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 672573963
  • Loading branch information
agutkin authored and copybara-github committed Sep 9, 2024
1 parent 9fce149 commit c18911c
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 26 deletions.
20 changes: 0 additions & 20 deletions nisaba/scripts/abjad_alphabet/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -154,23 +154,13 @@ nisaba_compile_script_lang_multi_grm_py(
nisaba_grm_textproto_test(
name = "visual_norm_test",
grammar = ":visual_norm",
# TODO: Fix these tests and re-enable in TAP.
tags = [
"manual",
"notap",
],
textproto = "testdata/visual_norm.textproto",
token_type = "byte",
)

nisaba_grm_textproto_test(
name = "visual_norm_utf8_test",
grammar = ":visual_norm_utf8",
# TODO: Fix these tests and re-enable in TAP.
tags = [
"manual",
"notap",
],
textproto = "testdata/visual_norm.textproto",
token_type = "utf8",
)
Expand All @@ -194,23 +184,13 @@ nisaba_compile_script_lang_multi_grm_py(
nisaba_grm_textproto_test(
name = "reading_norm_test",
grammar = ":reading_norm",
# TODO: Fix these tests and re-enable in TAP.
tags = [
"manual",
"notap",
],
textproto = "testdata/reading_norm.textproto",
token_type = "byte",
)

nisaba_grm_textproto_test(
name = "reading_norm_utf8_test",
grammar = ":reading_norm_utf8",
# TODO: Fix these tests and re-enable in TAP.
tags = [
"manual",
"notap",
],
textproto = "testdata/reading_norm.textproto",
token_type = "utf8",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ rewrite { rule: "CKB" input: "ڪەڵک" output: "کەڵک" }
# ALEF
rewrite { rule: "CKB" input: "خلجآن" output: "خلجان" }
rewrite { rule: "CKB" input: "پکآپ" output: "پکاپ" }
rewrite { rule: "CKB" input: "بةبآ" output: "بەبا" output: "بەبا" }
rewrite { rule: "CKB" input: "بةبآ" output: "بەبا" }
rewrite { rule: "CKB" input: "إِلَی" output: "اِلَی" }
rewrite { rule: "CKB" input: "إیراد" output: "ایراد" }
rewrite { rule: "CKB" input: "لنشأة" output: "لنشاە" }
Expand Down
10 changes: 5 additions & 5 deletions nisaba/scripts/abjad_alphabet/testdata/visual_norm.textproto
Original file line number Diff line number Diff line change
Expand Up @@ -610,19 +610,19 @@ rewrite { rule: "PA" input: "تبں۬اؤں۬ا" output: "تبناؤنا" }
rewrite { rule: "PA" input: "ادھنیٔم" output: "ادھنئم" }

# KAF → KEHEH
rewrite { rule: "PA" input: "كاں" output: "کاں" output: "کاں" }
rewrite { rule: "PA" input: "كاں" output: "کاں" }

# YEH → FARSI YEH
rewrite { rule: "PA" input: "خريد" output: "خرید" output: "خرید" }
rewrite { rule: "PA" input: "خريد" output: "خرید" }

# RNOON → TTEH
rewrite { rule: "PA" input: "ڻَنگیا" output: "ٹَنگیا" output: "ٹَنگیا" }
rewrite { rule: "PA" input: "ڻَنگیا" output: "ٹَنگیا" }

# TEH MARBUTA → TEH MARBUTA GOAL
rewrite { rule: "PA" input: "ة" output: "ۃ" output: "ۃ" }
rewrite { rule: "PA" input: "ة" output: "ۃ" }

# HEH GOAL WITH HAMZA ABOVE → HEH, HAMZA ABOVE
rewrite { rule: "PA" input: "ۂ" output: "هٔ" output: "هٔ" }
rewrite { rule: "PA" input: "ۂ" output: "هٔ" }

# Normalizations for isolated characters

Expand Down

0 comments on commit c18911c

Please sign in to comment.