Skip to content

Commit

Permalink
Merge pull request #31 from mistralai/default_to_ignore_tekken
Browse files Browse the repository at this point in the history
Tekken should have special token policy == "ignore" by default
  • Loading branch information
patrickvonplaten authored Jul 20, 2024
2 parents 75612d4 + b5566da commit 5933713
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "mistral_common"
version = "1.3.1"
version = "1.3.2"
description = ""
authors = ["bam4d <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion src/mistral_common/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.3.1"
__version__ = "1.3.2"
12 changes: 11 additions & 1 deletion src/mistral_common/tokens/tokenizers/tekken.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,17 @@ def _decode_all(self, tokens: List[int], special_token_policy: SpecialTokenPolic
for is_special, group in groupby(tokens, lambda t: t < self.num_special_tokens):
if is_special:
if special_token_policy == SpecialTokenPolicy.RAISE:
raise ValueError(f"Special tokens not allowed in this context: {list(group)}")
raise ValueError(
f"Decoding `tokens` that contain special tokens ({list(group)}) is not allowed. \n"
"Either make sure `tokens` do not include any special tokens or, "
"if you want to decode `tokens` that includes special tokens, "
"change the tokenizer's special token policy to IGNORE or KEEP: \n"
"```\nfrom mistral_common.tokens.tokenizers.mistral import MistralTokenizer"
"\nfrom mistral_common.tokens.tokenizers.tekken import SpecialTokenPolicy"
"\n\ntokenizer = MistralTokenizer.v3(is_tekken=True)"
"\ntokenizer.special_token_policy = SpecialTokenPolicy.IGNORE # or SpecialTokenPolicy.KEEP"
"\n```"
)
elif special_token_policy == SpecialTokenPolicy.KEEP:
decoded.extend(self._all_special_tokens[t] for t in group)
elif special_token_policy == SpecialTokenPolicy.IGNORE:
Expand Down

0 comments on commit 5933713

Please sign in to comment.