server: add --reasoning-budget 0 to disable thinking (incl. qwen3 w/ enable_thinking:false) (#13771)

--------- Co-authored-by: ochafik <ochafik@google.com> Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
2025-10-28 08:31:25 +00:00 · 2025-05-26 00:30:51 +01:00
parent 2f099b510f
commit e121edc432
12 changed files with 277 additions and 107 deletions
--- a/tools/server/utils.hpp
+++ b/tools/server/utils.hpp
@@ -568,6 +568,7 @@ struct oaicompat_parser_options {
    common_chat_templates * tmpls;
    bool allow_image;
    bool allow_audio;
+    bool enable_thinking = true;
 };

 // used by /chat/completions endpoint
@@ -733,6 +734,7 @@ static json oaicompat_chat_params_parse(
    inputs.parallel_tool_calls   = json_value(body, "parallel_tool_calls", false);
    inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true);
    inputs.reasoning_format      = opt.reasoning_format;
+    inputs.enable_thinking       = opt.enable_thinking;
    if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && body.contains("grammar")) {
        throw std::runtime_error("Cannot use custom grammar constraints with tools.");
    }