From 105d9745b18a044c193254176793535316d59b2c Mon Sep 17 00:00:00 2001 From: igeni Date: Thu, 21 Mar 2024 22:05:04 +0300 Subject: [PATCH] improvement of regexp (including support of unicode) --- .../sequence_parallel/data/tokenizer/bert_tokenization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tutorial/sequence_parallel/data/tokenizer/bert_tokenization.py b/examples/tutorial/sequence_parallel/data/tokenizer/bert_tokenization.py index 91f76110e85b..8222b8c499d2 100644 --- a/examples/tutorial/sequence_parallel/data/tokenizer/bert_tokenization.py +++ b/examples/tutorial/sequence_parallel/data/tokenizer/bert_tokenization.py @@ -35,7 +35,7 @@ def validate_case_matches_checkpoint(do_lower_case, init_checkpoint): if not init_checkpoint: return - m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint) + m = re.match("^.*?([\w-]+)/bert_model.ckpt", init_checkpoint) if m is None: return