This commit is contained in:
younesbelkada
2025-07-04 15:50:43 +04:00
parent a6d0067dd7
commit 1fd0574adc

View File

@@ -607,7 +607,10 @@ class TextModel(ModelBase):
from transformers import AutoTokenizer from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(self.dir_model) tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab)) vocab_size = max(
self.hparams.get("vocab_size", len(tokenizer.vocab)),
len(tokenizer.vocab)
)
assert max(tokenizer.vocab.values()) < vocab_size assert max(tokenizer.vocab.values()) < vocab_size
tokpre = self.get_vocab_base_pre(tokenizer) tokpre = self.get_vocab_base_pre(tokenizer)