mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
gpt-oss: implement harmony parsing (#15181)
* model : add harmony parser for gpt-oss * gpt-oss : fix grammar trigger from causing empty stack * gpt-oss: tweak the grammar trigger again * gpt-oss : add support for recipient in role header * gpt-oss : fix ungrouped tool calls in grammar * gpt-oss : loosen function name matching during parse * gpt-oss : clean up workarounds * gpt-oss : add template tests * gpt-oss : simulate thinking and tool call tags * gpt-oss : undo think tags when reasoning_format is none * gpt-oss : set special tokens back to user defined * gpt-oss : update openai-gpt-oss template * server : filter out harmony thought messages * gpt-oss : simplify parsing
This commit is contained in:
@@ -2341,7 +2341,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||
|
||||
// @ngxson : quick hack for gpt-oss, always render these tokens
|
||||
for (const auto & t : token_to_id) {
|
||||
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>") {
|
||||
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") {
|
||||
id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
||||
}
|
||||
}
|
||||
@@ -2388,6 +2388,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||
|
||||
if (has_return && has_call && has_end) {
|
||||
special_eog_ids.erase(end_id);
|
||||
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
|
||||
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user