vocab : JetBrains Mellum pre-tokenizer (#15045)

This commit is contained in:
Csaba Kecskemeti
2025-08-03 12:38:18 -07:00
committed by GitHub
parent 83bc2f288c
commit 97366dc6ab
3 changed files with 6 additions and 1 deletions

View File

@@ -1856,7 +1856,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
tokenizer_pre == "gigachat" ||
tokenizer_pre == "jina-v2-es" ||
tokenizer_pre == "jina-v2-de" ||
tokenizer_pre == "a.x-4.0") {
tokenizer_pre == "a.x-4.0" ||
tokenizer_pre == "mellum") {
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
} else if (
tokenizer_pre == "jina-v1-en" ||