From 0d96944f4a63b9dc4b2cf7244ff687daa7961517 Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 22 Aug 2025 13:31:45 +0000 Subject: [PATCH] Add DeepSeek V3.1 thinking mode support - Added COMMON_CHAT_FORMAT_DEEPSEEK_V3_1 enum value - Created common_chat_params_init_deepseek_v3_1() function (currently uses R1 implementation) - Created common_chat_parse_deepseek_v3_1() function that handles V3.1 thinking format: - Extracts reasoning content before '' tag into reasoning_content - Extracts regular content after '' tag into content - No opening '' tag in V3.1 format - Added detection logic for V3.1 templates based on pattern: 'message['prefix'] is defined and message['prefix'] and thinking' - Added V3.1 case to parsing switch statement This addresses the issue where V3.1 outputs reasoning content followed by '' and then regular content without the opening '' tag. --- common/chat.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ common/chat.h | 1 + 2 files changed, 42 insertions(+) diff --git a/common/chat.cpp b/common/chat.cpp index 7f6809a4ed..34e6152164 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1312,6 +1312,12 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ } return data; } + +static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) { + // For now, use the same implementation as R1 + return common_chat_params_init_deepseek_r1(tmpl, inputs); +} + static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { builder.try_parse_reasoning("", ""); if (!builder.syntax().parse_tool_calls) { @@ -1333,6 +1339,32 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { tool_calls_end); } +static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) { + // DeepSeek V3.1 outputs reasoning content followed by "" and then regular content + // There's no opening "" tag, so we need to handle this differently + + // First, try to find the "" tag that separates thinking from regular content + static const common_regex thinking_end_regex(""); + if (auto res = builder.try_find_regex(thinking_end_regex)) { + // Extract everything before "" as reasoning content + auto reasoning_content = builder.str(common_string_range{0, res->groups[0].begin}); + auto stripped_reasoning = string_strip(reasoning_content); + + if (!stripped_reasoning.empty()) { + builder.add_reasoning_content(stripped_reasoning); + } + + // Move past the "" tag + builder.move_to(res->groups[0].end); + + // The rest is regular content + builder.add_content(builder.consume_rest()); + } else { + // If no "" tag found, treat everything as regular content + builder.add_content(builder.consume_rest()); + } +} + static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) { common_chat_params data; auto prompt = apply(tmpl, inputs); @@ -2100,6 +2132,12 @@ static common_chat_params common_chat_templates_apply_jinja( } } + // DeepSeek V3.1: detect based on specific patterns in the template + if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos && + params.json_schema.is_null()) { + return common_chat_params_init_deepseek_v3_1(tmpl, params); + } + // DeepSeek R1: use handler in all cases except json schema (thinking / tools). if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) { return common_chat_params_init_deepseek_r1(tmpl, params); @@ -2262,6 +2300,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) { case COMMON_CHAT_FORMAT_DEEPSEEK_R1: common_chat_parse_deepseek_r1(builder); break; + case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: + common_chat_parse_deepseek_v3_1(builder); + break; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: common_chat_parse_functionary_v3_2(builder); break; diff --git a/common/chat.h b/common/chat.h index d1e480c918..f7c31221cd 100644 --- a/common/chat.h +++ b/common/chat.h @@ -107,6 +107,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_FIREFUNCTION_V2, COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, COMMON_CHAT_FORMAT_HERMES_2_PRO, COMMON_CHAT_FORMAT_COMMAND_R7B, COMMON_CHAT_FORMAT_GRANITE,