llama : improve sep token handling (#14272)

This commit is contained in:
Sigbjørn Skjæret
2025-06-20 14:04:09 +02:00
committed by GitHub
parent e28c1b93fd
commit 88fc854b4b
15 changed files with 161 additions and 29 deletions

View File

@@ -228,6 +228,7 @@ void llama_model_saver::add_kv_from_model() {
// add_kv(LLM_KV_TOKENIZER_MASK_ID, ???);
add_kv(LLM_KV_TOKENIZER_ADD_BOS, vocab.get_add_bos());
add_kv(LLM_KV_TOKENIZER_ADD_EOS, vocab.get_add_eos());
add_kv(LLM_KV_TOKENIZER_ADD_SEP, vocab.get_add_sep());
add_kv(LLM_KV_TOKENIZER_ADD_PREFIX, vocab.get_add_space_prefix());
add_kv(LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, vocab.get_remove_extra_whitespaces());
add_kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, vocab.get_precompiled_charsmap());