diff --git a/nemo/collections/llm/gpt/model/base.py b/nemo/collections/llm/gpt/model/base.py index 5b35f5da0ba5..0d2dbb08aff0 100644 --- a/nemo/collections/llm/gpt/model/base.py +++ b/nemo/collections/llm/gpt/model/base.py @@ -206,10 +206,11 @@ def configure_model(self, tokenizer, pre_process=None, post_process=None) -> "MC if hasattr(self, 'vocab_size'): vocab_size = self.vocab_size - logging.info( - f"Use preset vocab_size: {vocab_size}, original vocab_size: {tokenizer.vocab_size}, dummy tokens:" - f" {vocab_size - tokenizer.vocab_size}." - ) + if tokenizer is not None: + logging.info( + f"Use preset vocab_size: {vocab_size}, original vocab_size: {tokenizer.vocab_size}, dummy tokens:" + f" {vocab_size - tokenizer.vocab_size}." + ) else: vocab_size = get_vocab_size(self, tokenizer.vocab_size, self.make_vocab_size_divisible_by) @@ -405,11 +406,21 @@ def get_inference_wrapper(self, params_dtype, inference_batch_times_seqlen_thres if mcore_model is None or type(mcore_model) is not MCoreGPTModel: raise ValueError("Exact McoreGPTModel instance not found in the model structure.") + vocab_size = None + if self.tokenizer is not None: + vocab_size = self.tokenizer.vocab_size + elif hasattr(self.config, 'vocab_size'): + vocab_size = self.config.vocab_size + else: + raise ValueError( + 'Unable to find vocab size. Either pass in a tokenizer with vocab size, or set vocab size in the model config' + ) + inference_wrapper_config = InferenceWrapperConfig( hidden_size=mcore_model.config.hidden_size, params_dtype=params_dtype, inference_batch_times_seqlen_threshold=inference_batch_times_seqlen_threshold, - padded_vocab_size=self.tokenizer.vocab_size, + padded_vocab_size=vocab_size, ) model_inference_wrapper = GPTInferenceWrapper(mcore_model, inference_wrapper_config)