gpt-oss: implement harmony parsing (#15181)

* model : add harmony parser for gpt-oss * gpt-oss : fix grammar trigger from causing empty stack * gpt-oss: tweak the grammar trigger again * gpt-oss : add support for recipient in role header * gpt-oss : fix ungrouped tool calls in grammar * gpt-oss : loosen function name matching during parse * gpt-oss : clean up workarounds * gpt-oss : add template tests * gpt-oss : simulate thinking and tool call tags * gpt-oss : undo think tags when reasoning_format is none * gpt-oss : set special tokens back to user defined * gpt-oss : update openai-gpt-oss template * server : filter out harmony thought messages * gpt-oss : simplify parsing
2025-10-27 08:21:30 +00:00 · 2025-08-14 09:23:11 -05:00
parent 646944cfa8
commit b204a5a234
7 changed files with 672 additions and 12 deletions
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2341,7 +2341,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {

        // @ngxson : quick hack for gpt-oss, always render these tokens
        for (const auto & t : token_to_id) {
-            if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>") {
+            if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") {
                id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
            }
        }
@@ -2388,6 +2388,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {

            if (has_return && has_call && has_end) {
                special_eog_ids.erase(end_id);
+                id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
                LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
            }
        }