chat : Seed OSS thinking + tool call support (#15552)

* Reasoning and tool-calling support for Seed OSS

* Fix grammar and partial parsing

* Whitespace

* New chat template

* Update common/chat.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update common/chat.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Remove unused 'purge_healing_marker' helper

---------

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
Piotr Wilkin (ilintar)
2025-08-29 14:53:41 +02:00
committed by GitHub
parent 009b709d6e
commit 60e5eee31f
4 changed files with 458 additions and 1 deletions

View File

@@ -1621,6 +1621,140 @@ static void test_template_output_parsers() {
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
}));
}
{
// Seed-OSS format tests
auto tmpls = read_templates("models/templates/ByteDance-Seed-OSS.jinja");
std::vector<std::string> end_tokens{ "<seed:eos>" };
assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
// Test simple reasoning content
assert_msg_equals(
simple_assist_msg("Hello, world!", "I'm thinking about the answer"),
common_chat_parse(
"<seed:think>I'm thinking about the answer</seed:think>Hello, world!",
/* is_partial= */ false,
{
/* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
// Test budget reflection tags
common_chat_msg msg_budget_reflect;
msg_budget_reflect.role = "assistant";
msg_budget_reflect.content = "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>I need to calculate this step by step.";
msg_budget_reflect.reasoning_content = "Token usage: 45/1000\nI should continue thinking to find the best solution.";
assert_msg_equals(
msg_budget_reflect,
common_chat_parse(
"<seed:think>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:think>"
"<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>"
"I need to calculate this step by step.",
/* is_partial= */ false,
{
/* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
// Test tool calls with Seed-OSS format
common_chat_msg msg_tool_call;
msg_tool_call.role = "assistant";
msg_tool_call.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
assert_msg_equals(
msg_tool_call,
common_chat_parse(
"<seed:tool_call>\n"
"<function=calculate_sum>\n"
"<parameter=numbers>[1, 2, 3]</parameter>\n"
"</function>\n"
"</seed:tool_call>",
/* is_partial= */ false,
{COMMON_CHAT_FORMAT_SEED_OSS}));
// Test reasoning + tool call combination
common_chat_msg msg_reasoning_tool;
msg_reasoning_tool.role = "assistant";
msg_reasoning_tool.content = "";
msg_reasoning_tool.reasoning_content = "I need to calculate the sum of these numbers";
msg_reasoning_tool.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
assert_msg_equals(
msg_reasoning_tool,
common_chat_parse(
"<seed:think>I need to calculate the sum of these numbers</seed:think>"
"<seed:tool_call>\n"
"<function=calculate_sum>\n"
"<parameter=numbers>[1, 2, 3]</parameter>\n"
"</function>\n"
"</seed:tool_call>",
/* is_partial= */ false,
{
/* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
// Test deltas: the number of tool calls in partial parses should never decrease
std::string tool_msg = "<seed:tool_call>\n"
"<function=fun>\n"
"<parameter=smth>[1, 2, 3]</parameter>\n"
"</function>";
std::size_t previousToolCalls = 0;
for (std::size_t i = std::string("<seed:tool_call>").length(); i < tool_msg.length() - 1; i++) {
auto partial = tool_msg.substr(0, i);
auto partial_res = common_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK });
if (partial_res.tool_calls.size() < previousToolCalls) {
throw std::runtime_error("Tool call size decreased on partial: " + partial + " from " + std::to_string(previousToolCalls) + " to " + std::to_string(partial_res.tool_calls.size()));
}
previousToolCalls = partial_res.tool_calls.size();
}
// Test multiple parameters in tool call
common_chat_msg msg_multi_param;
msg_multi_param.role = "assistant";
msg_multi_param.tool_calls.push_back({"process_data", "{\"input\": \"test\", \"format\": \"json\"}", ""});
assert_msg_equals(
msg_multi_param,
common_chat_parse(
"<seed:tool_call>\n"
"<function=process_data>\n"
"<parameter=input>test</parameter>\n"
"<parameter=format>json</parameter>\n"
"</function>\n"
"</seed:tool_call>",
/* is_partial= */ false,
{COMMON_CHAT_FORMAT_SEED_OSS}));
// Test partial parsing for incomplete tool call - don't actually add the call until parsing parameters is done
assert_msg_equals(
simple_assist_msg("", ""),
common_chat_parse(
"<seed:tool_call>\n"
"<function=calculate_sum>\n"
"<parameter=numbers>[1,\n",
/* is_partial= */ true,
{COMMON_CHAT_FORMAT_SEED_OSS}));
// Test incomplete reasoning tag
assert_msg_equals(
simple_assist_msg("", "I was thinking"),
common_chat_parse(
"<seed:think>I was thinking",
/* is_partial= */ true,
{
/* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
// Test content without reasoning
assert_msg_equals(
simple_assist_msg("This is a simple response without reasoning."),
common_chat_parse(
"This is a simple response without reasoning.",
/* is_partial= */ false,
{COMMON_CHAT_FORMAT_SEED_OSS}));
}
}
static void test_msg_diffs_compute() {