Skip to content

Commit

Permalink
fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pavel-esir committed Oct 3, 2024
1 parent 5314bb1 commit c9cfb80
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
4 changes: 2 additions & 2 deletions python/openvino_tokenizers/convert_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,13 @@ def convert_tokenizer(
if isinstance(tokenizer_object, PreTrainedTokenizerBase):
if can_use_sentencepiece and (is_unigram or not tokenizer_object.is_fast or use_sentencepiece_backend):
logger.info("Convert tokenizer using SentencePiece .model file.")
ov_tokenizers = convert_sentencepiece_model_tokenizer(tokenizer_object, params, add_attention_mask=True)
ov_tokenizers = convert_sentencepiece_model_tokenizer(tokenizer_object, params)
elif is_tiktoken_model(tokenizer_object):
logger.info("Convert tiktoken-based tokenizer")
ov_tokenizers = convert_tiktoken_model_tokenizer(tokenizer_object, params)
elif isinstance(tokenizer_object, PreTrainedTokenizerFast):
logger.info("Convert Huggingface Fast tokenizer pipeline.")
ov_tokenizers = convert_fast_tokenizer(tokenizer_object, params, number_of_inputs=1)
ov_tokenizers = convert_fast_tokenizer(tokenizer_object, params)
else:
raise OVTypeError(f"Huggingface tokenizer type is not supported: {type(tokenizer_object)}")

Expand Down
4 changes: 3 additions & 1 deletion python/openvino_tokenizers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ class TokenzierConversionParams:
handle_special_tokens_with_re: Optional[bool] = None
use_sentencepiece_backend: bool = False
utf8_replace_mode: Optional[UTF8ReplaceMode] = None

add_attention_mask: bool = True
add_prefix_space: Optional[bool] = None
number_of_inputs: int = 1

logger = logging.getLogger(__name__)

Expand Down

0 comments on commit c9cfb80

Please sign in to comment.