Skip to content

Commit

Permalink
Up
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickvonplaten committed Nov 19, 2024
1 parent d9e120f commit cffadd2
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/mistral_common/tokens/tokenizers/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ def from_model(cls, model: str, strict: bool = False) -> "MistralTokenizer":

if not strict:
warnings.warn(
"Calling `MistralTokenizer.from_model(..., strict=False)` is deprecated as it can lead to incorrect tokenizers."
"It is strongly recommended to use MistralTokenizer.from_model(..., strict=True)` "
"Calling `MistralTokenizer.from_model(..., strict=False)` is deprecated as it can lead to incorrect "
"tokenizers. It is strongly recommended to use MistralTokenizer.from_model(..., strict=True)` "
"which will become the default in `mistral_common=1.6.0`."
"If you are using `mistral_common` for open-sourced model weights, we recommend using "
"`MistralTokenizer.from_file('<path/to/tokenizer/file>')` instead.",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_tokenizer_v7.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def test_truncation_failed(tekkenizer: InstructTokenizerV7, messages: List[ChatM
tekkenizer.encode_instruct(InstructRequest(messages=messages, truncate_at_max_tokens=9))


def test_from_model():
def test_from_model() -> None:
tokenizer = MistralTokenizer.from_model("ministral-8b-2410", strict=True)
assert tokenizer.instruct_tokenizer.tokenizer.version == TokenizerVersion.v3
assert tokenizer.instruct_tokenizer.mm_encoder is None
Expand Down

0 comments on commit cffadd2

Please sign in to comment.