mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
convert : automatically fall back to HfVocab if tokenizer.model doesn't exist (#5821)
This commit is contained in:
@@ -373,7 +373,7 @@ def handle_metadata(cfg, hp):
|
||||
raise ValueError('Unable to load metadata')
|
||||
vocab_path = Path(cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir)
|
||||
vocab_factory = convert.VocabFactory(vocab_path)
|
||||
vocab, special_vocab = vocab_factory.load_vocab(cfg.vocabtype, cfg.model_metadata_dir)
|
||||
vocab, special_vocab = vocab_factory.load_vocab(cfg.vocabtype.split(","), cfg.model_metadata_dir)
|
||||
convert.check_vocab_size(params, vocab)
|
||||
return params, vocab, special_vocab
|
||||
|
||||
@@ -398,8 +398,8 @@ def handle_args():
|
||||
help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
|
||||
parser.add_argument("--vocab-dir", type=Path,
|
||||
help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
|
||||
parser.add_argument("--vocabtype", choices=["spm", "bpe"], default="spm",
|
||||
help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)")
|
||||
parser.add_argument("--vocabtype", default="spm,hfft",
|
||||
help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm,hfft)")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user