Skip to content

Commit

Permalink
fix: tests_train_bpe_w_specail_tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
Hk669 committed Jun 5, 2024
1 parent d8e2eac commit 3bfaf5c
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions tests/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ def test_train_bpe_w_special_tokens():
texts = "<|startoftext|> Hello, World! This is a sample text with the special tokens [SPECIAL1] and [SPECIAL2] to test the tokenizer.<|endoftext|>"
tokenizer.train(texts, vocab_size=310, verbose=False)

assert len(tokenizer.vocab) == 281
assert len(tokenizer.merges) == 25
assert len(tokenizer.vocab) == 310
assert len(tokenizer.merges) == 54
assert tokenizer.decode(tokenizer.encode(texts)) == texts
assert tokenizer.inverse_special_tokens == {v: k for k,v in special_tokens.items()}
assert tokenizer.special_tokens == special_tokens
Expand Down

0 comments on commit 3bfaf5c

Please sign in to comment.