Skip to content

Commit

Permalink
Add uk-ua symbols configuration to include "`" in normalized utterances
Browse files Browse the repository at this point in the history
  • Loading branch information
NeonDaniel committed Jun 18, 2024
1 parent 96b0203 commit d76e6a6
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
3 changes: 2 additions & 1 deletion lingua_franca/lang/parse_uk.py
Original file line number Diff line number Diff line change
Expand Up @@ -1642,7 +1642,8 @@ def extract_numbers_uk(text, short_scale=True, ordinals=False):


class UkrainianNormalizer(Normalizer):
with open(resolve_resource_file("text/uk-ua/normalize.json"), encoding='utf8') as f:
with open(resolve_resource_file("text/uk-ua/normalize.json"),
encoding='utf8') as f:
_default_config = json.load(f)


Expand Down
2 changes: 2 additions & 0 deletions lingua_franca/res/text/uk-ua/normalize.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
"тисяча": "1000",
"тисяч": "1000"
},
"symbols": [".", ",", ";", "_", "!", "?", "<", ">", "|", "(", ")", "=", "[",
"]", "{", "}", "»", "«", "*", "~", "^", "\""],
"stopwords": [],
"articles": []
}

0 comments on commit d76e6a6

Please sign in to comment.