diff --git a/nisaba/scripts/brahmic/iso.py b/nisaba/scripts/brahmic/iso.py index 02e675fb..b723039f 100644 --- a/nisaba/scripts/brahmic/iso.py +++ b/nisaba/scripts/brahmic/iso.py @@ -56,16 +56,16 @@ def brahmic_to_iso( - consonant_file: os.PathLike, - inherent_vowel_file: os.PathLike, - vowel_sign_file: os.PathLike, - vowel_file: os.PathLike, - vowel_length_sign_file: os.PathLike, - coda_file: os.PathLike, - dead_consonant_file: os.PathLike, - standalone_file: os.PathLike, - subjoined_consonant_file: os.PathLike, - virama_file: os.PathLike, + consonant_file: os.PathLike[str], + inherent_vowel_file: os.PathLike[str], + vowel_sign_file: os.PathLike[str], + vowel_file: os.PathLike[str], + vowel_length_sign_file: os.PathLike[str], + coda_file: os.PathLike[str], + dead_consonant_file: os.PathLike[str], + standalone_file: os.PathLike[str], + subjoined_consonant_file: os.PathLike[str], + virama_file: os.PathLike[str], ) -> p.Fst: """Creates an FST that transduces a Brahmic script to ISO 15919. diff --git a/nisaba/scripts/brahmic/util.py b/nisaba/scripts/brahmic/util.py index 38f0eddd..f7579b1d 100644 --- a/nisaba/scripts/brahmic/util.py +++ b/nisaba/scripts/brahmic/util.py @@ -58,7 +58,7 @@ def OpenSigma(script: str, token_type: str) -> pynini.Fst: def MaybeLoadScriptConfig( - file_path: os.PathLike) -> script_config_pb2.ScriptConfig: + file_path: os.PathLike[str]) -> script_config_pb2.ScriptConfig: """Loads script configuration, if present.""" pb = script_config_pb2.ScriptConfig() if not uf.IsFileExist(file_path): diff --git a/nisaba/scripts/brahmic/visual_norm.py b/nisaba/scripts/brahmic/visual_norm.py index b515711e..0a1fea4a 100644 --- a/nisaba/scripts/brahmic/visual_norm.py +++ b/nisaba/scripts/brahmic/visual_norm.py @@ -46,9 +46,9 @@ 'Token type: utf8 or byte') -def core_visual_norm_fsts(rewrite_file: os.PathLike, - preserve_file: os.PathLike, - consonant_file: os.PathLike, +def core_visual_norm_fsts(rewrite_file: os.PathLike[str], + preserve_file: os.PathLike[str], + consonant_file: os.PathLike[str], sigma: pynini.Fst) -> List[pynini.Fst]: """Creates a visual normalization FST. diff --git a/nisaba/scripts/brahmic/wellformed.py b/nisaba/scripts/brahmic/wellformed.py index e58af465..ba104382 100644 --- a/nisaba/scripts/brahmic/wellformed.py +++ b/nisaba/scripts/brahmic/wellformed.py @@ -35,24 +35,24 @@ import nisaba.scripts.utils.file as uf -def _input_string_file(filename: os.PathLike, +def _input_string_file(filename: os.PathLike[str], return_if_empty: pynini.Fst = uf.EMPTY) -> pynini.Fst: fst = uf.StringFile(filename, return_if_empty) return pynini.project(fst, 'input').rmepsilon() -def accept_well_formed(script_config_file: os.PathLike, - consonant_file: os.PathLike, - dead_consonant_file: os.PathLike, - subjoined_consonant_file: os.PathLike, - vowel_sign_file: os.PathLike, - vowel_file: os.PathLike, - vowel_length_sign_file: os.PathLike, - coda_file: os.PathLike, - standalone_file: os.PathLike, - virama_file: os.PathLike, - accept_file: os.PathLike, - preserve_file: os.PathLike) -> pynini.Fst: +def accept_well_formed(script_config_file: os.PathLike[str], + consonant_file: os.PathLike[str], + dead_consonant_file: os.PathLike[str], + subjoined_consonant_file: os.PathLike[str], + vowel_sign_file: os.PathLike[str], + vowel_file: os.PathLike[str], + vowel_length_sign_file: os.PathLike[str], + coda_file: os.PathLike[str], + standalone_file: os.PathLike[str], + virama_file: os.PathLike[str], + accept_file: os.PathLike[str], + preserve_file: os.PathLike[str]) -> pynini.Fst: """Create an unweighted FSA to accept the well-formed strings in a script. Args: diff --git a/nisaba/scripts/utils/char.py b/nisaba/scripts/utils/char.py index 14978b8f..b22064bf 100644 --- a/nisaba/scripts/utils/char.py +++ b/nisaba/scripts/utils/char.py @@ -27,8 +27,9 @@ ZWS = "\u200B" # Zero Width Space -def _read_string_file_chars_to_set(files: Iterable[os.PathLike], - relevant_fields: int) -> Set[str]: +def _read_string_file_chars_to_set( + files: Iterable[os.PathLike[str]], relevant_fields: int +) -> Set[str]: """Reads the characters under some selection from some file paths into a set. Arguments: @@ -51,8 +52,10 @@ def _read_string_file_chars_to_set(files: Iterable[os.PathLike], return chars -def derive_chars(both_sides: Iterable[os.PathLike] = (), - input_side: Iterable[os.PathLike] = ()) -> Set[str]: +def derive_chars( + both_sides: Iterable[os.PathLike[str]] = (), + input_side: Iterable[os.PathLike[str]] = () +) -> Set[str]: """Create the set of characters in a script from StringFiles. Args: diff --git a/nisaba/scripts/utils/file.py b/nisaba/scripts/utils/file.py index 6c437fe1..c95c5a6c 100644 --- a/nisaba/scripts/utils/file.py +++ b/nisaba/scripts/utils/file.py @@ -26,12 +26,12 @@ EPSILON: pynini.Fst = pynini.accep("").optimize() -def AsResourcePath(filename: os.PathLike) -> os.PathLike: +def AsResourcePath(filename: os.PathLike[str]) -> os.PathLike[str]: filename = os.fspath(filename) return pathlib.Path(runfiles.Create().Rlocation(filename)) -def IsFileExist(filename: os.PathLike) -> bool: +def IsFileExist(filename: os.PathLike[str]) -> bool: """Checks if a resource file exists.""" try: if os.path.isfile(AsResourcePath(filename)): @@ -47,7 +47,7 @@ def OnEmpty(fst, return_if_empty=EMPTY): return return_if_empty if fst.start() == pynini.NO_STATE_ID else fst -def StringFile(filename: os.PathLike, +def StringFile(filename: os.PathLike[str], return_if_empty: pynini.Fst = EMPTY) -> pynini.Fst: """Reads FST from `filename`. If FST is empty returns `return_if_empty`.""" return OnEmpty(pynini.string_file(AsResourcePath(filename)), return_if_empty) diff --git a/nisaba/scripts/utils/letter_languages.py b/nisaba/scripts/utils/letter_languages.py index 5cc187d5..26d79744 100644 --- a/nisaba/scripts/utils/letter_languages.py +++ b/nisaba/scripts/utils/letter_languages.py @@ -41,7 +41,9 @@ def _fill_missing_raw(pb: ll.LetterLanguages) -> None: item.letter.raw, _ = us.convert_item(pb.uname_prefix, [], item.letter) -def read_textproto(proto_path: Union[str, os.PathLike]) -> ll.LetterLanguages: +def read_textproto( + proto_path: Union[str, os.PathLike[str]] +) -> ll.LetterLanguages: pb = proto.read_textproto(proto_path, ll.LetterLanguages()) _fill_missing_raw(pb) logging.info('Read %d letters.', len(pb.item)) diff --git a/nisaba/scripts/utils/proto.py b/nisaba/scripts/utils/proto.py index cc047835..1745d655 100644 --- a/nisaba/scripts/utils/proto.py +++ b/nisaba/scripts/utils/proto.py @@ -29,7 +29,7 @@ def read_textproto( - proto_path: Union[str, os.PathLike], proto: _ParsableT + proto_path: Union[str, os.PathLike[str]], proto: _ParsableT ) -> _ParsableT: logging.info('Parsing %s ...', proto_path) if not os.path.exists(proto_path): diff --git a/nisaba/scripts/utils/rule.py b/nisaba/scripts/utils/rule.py index 2b9af697..0bd87ddd 100644 --- a/nisaba/scripts/utils/rule.py +++ b/nisaba/scripts/utils/rule.py @@ -76,12 +76,12 @@ RuleSets = List[RuleSet] -def rules_from_string_file(file: os.PathLike) -> Iterator[Rule]: +def rules_from_string_file(file: os.PathLike[str]) -> Iterator[Rule]: """Yields string rules from a text resource with unweighted string maps.""" return rules_from_string_path(uf.AsResourcePath(file)) -def rules_from_string_path(file: os.PathLike) -> Iterator[Rule]: +def rules_from_string_path(file: os.PathLike[str]) -> Iterator[Rule]: """Yields string rules from a text file with unweighted string maps.""" with pathlib.Path(file).open('rt') as f: df = pd.read_csv(f, sep='\t', comment='#', escapechar='\\', @@ -143,7 +143,9 @@ def fst_from_rules(rules: RuleSet, sigma: pynini.Fst) -> pynini.Fst: return ur.RewriteAndComposeFsts(fsts, sigma) -def fst_from_rule_file(rule_file: os.PathLike, sigma: pynini.Fst) -> pynini.Fst: +def fst_from_rule_file( + rule_file: os.PathLike[str], sigma: pynini.Fst +) -> pynini.Fst: """Gets rewrite FST from a given rewrite rule file. Args: @@ -172,7 +174,7 @@ def _fst_from_cascading_rules(rules: RuleSet, sigma: pynini.Fst) -> pynini.Fst: return ur.RewriteAndComposeFsts(fsts, sigma) -def fst_from_cascading_rule_file(rule_file: os.PathLike, +def fst_from_cascading_rule_file(rule_file: os.PathLike[str], sigma: pynini.Fst) -> pynini.Fst: """Gets rewrite FST from a given rewrite rule file. diff --git a/nisaba/scripts/utils/unicode_strings_util.py b/nisaba/scripts/utils/unicode_strings_util.py index a06faa8b..a242810f 100644 --- a/nisaba/scripts/utils/unicode_strings_util.py +++ b/nisaba/scripts/utils/unicode_strings_util.py @@ -168,7 +168,7 @@ def _fill_missing_raw(pb: unicode_strings_pb2.UnicodeStrings) -> None: def read_textproto( - proto_path: Union[str, os.PathLike], + proto_path: Union[str, os.PathLike[str]], ) -> unicode_strings_pb2.UnicodeStrings: pb = proto.read_textproto(proto_path, unicode_strings_pb2.UnicodeStrings()) _fill_missing_raw(pb)