mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
chat: Fix streaming parser for granite models (#15682)
* fix(chat): fix streaming parser for granite models * tests: add test cases for Granite models chat parser
This commit is contained in:
@@ -2232,15 +2232,28 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
|
||||
|
||||
static void common_chat_parse_granite(common_chat_msg_parser & builder) {
|
||||
// Parse thinking tags
|
||||
static const common_regex start_think_regex(regex_escape("<think>"));
|
||||
static const common_regex end_think_regex(regex_escape("</think>"));
|
||||
// Granite models output partial tokens such as "<" and "<think".
|
||||
// By leveraging try_consume_regex()/try_find_regex() throwing
|
||||
// common_chat_msg_partial_exception for these partial tokens,
|
||||
// processing is interrupted and the tokens are not passed to add_content().
|
||||
if (auto res = builder.try_consume_regex(start_think_regex)) {
|
||||
// Restore position for try_parse_reasoning()
|
||||
builder.move_to(res->groups[0].begin);
|
||||
builder.try_find_regex(end_think_regex, std::string::npos, false);
|
||||
// Restore position for try_parse_reasoning()
|
||||
builder.move_to(res->groups[0].begin);
|
||||
}
|
||||
builder.try_parse_reasoning("<think>", "</think>");
|
||||
|
||||
// Parse response tags using regex
|
||||
static const common_regex response_regex("<response>([\\s\\S]*?)</response>");
|
||||
if (auto res = builder.try_find_regex(response_regex)) {
|
||||
// Extract the content between the tags (capture group 1)
|
||||
auto content = builder.str(res->groups[1]);
|
||||
builder.add_content(content);
|
||||
builder.move_to(res->groups[0].end);
|
||||
// Parse response tags
|
||||
static const common_regex start_response_regex(regex_escape("<response>"));
|
||||
static const common_regex end_response_regex(regex_escape("</response>"));
|
||||
// Granite models output partial tokens such as "<" and "<response".
|
||||
// Same hack as reasoning parsing.
|
||||
if (builder.try_consume_regex(start_response_regex)) {
|
||||
builder.try_find_regex(end_response_regex);
|
||||
}
|
||||
|
||||
if (!builder.syntax().parse_tool_calls) {
|
||||
@@ -2254,13 +2267,10 @@ static void common_chat_parse_granite(common_chat_msg_parser & builder) {
|
||||
builder.move_to(res->groups[0].end);
|
||||
|
||||
// Expect JSON array of tool calls
|
||||
auto tool_calls_data = builder.consume_json();
|
||||
if (tool_calls_data.json.is_array()) {
|
||||
if (!builder.add_tool_calls(tool_calls_data.json)) {
|
||||
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
|
||||
if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
|
||||
if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
|
||||
throw common_chat_msg_partial_exception("incomplete tool call");
|
||||
}
|
||||
} else {
|
||||
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
|
||||
}
|
||||
} else {
|
||||
builder.add_content(builder.consume_rest());
|
||||
|
||||
@@ -1402,6 +1402,12 @@ static void test_template_output_parsers() {
|
||||
"Hello, world!\nWhat's up?",
|
||||
/* is_partial= */ false,
|
||||
{COMMON_CHAT_FORMAT_GRANITE}));
|
||||
assert_msg_equals(
|
||||
message_assist,
|
||||
common_chat_parse(
|
||||
"Hello, world!\nWhat's up?",
|
||||
/* is_partial= */ true,
|
||||
{COMMON_CHAT_FORMAT_GRANITE}));
|
||||
|
||||
// Test parsing content with thinking
|
||||
assert_msg_equals(message_assist_thoughts,
|
||||
@@ -1412,6 +1418,59 @@ static void test_template_output_parsers() {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
|
||||
/* is_partial= */ false,
|
||||
{COMMON_CHAT_FORMAT_GRANITE}));
|
||||
assert_msg_equals(message_assist_thoughts,
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts,
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>"),
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
|
||||
/* is_partial= */ false,
|
||||
{COMMON_CHAT_FORMAT_GRANITE}));
|
||||
assert_msg_equals(message_assist_empty,
|
||||
common_chat_parse(
|
||||
"<think",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
}));
|
||||
assert_msg_equals(message_assist_empty,
|
||||
common_chat_parse(
|
||||
"<think",
|
||||
/* is_partial= */ true,
|
||||
{COMMON_CHAT_FORMAT_GRANITE}));
|
||||
assert_msg_equals(message_assist_thoughts_no_content,
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
}));
|
||||
assert_msg_equals(
|
||||
message_assist_empty,
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think><response",
|
||||
/* is_partial= */ true,
|
||||
{COMMON_CHAT_FORMAT_GRANITE}));
|
||||
|
||||
// Test parsing tool calls
|
||||
assert_msg_equals(message_assist_call,
|
||||
@@ -1419,6 +1478,38 @@ static void test_template_output_parsers() {
|
||||
"<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]",
|
||||
/* is_partial= */ false,
|
||||
{COMMON_CHAT_FORMAT_GRANITE}));
|
||||
assert_msg_equals(
|
||||
message_assist_call_empty_args,
|
||||
common_chat_parse(
|
||||
"<|tool_call|>[{\"name\": \"special_function\"",
|
||||
/* is_partial= */ true,
|
||||
{COMMON_CHAT_FORMAT_GRANITE}));
|
||||
assert_msg_equals(
|
||||
message_assist_call_cutoff_args,
|
||||
common_chat_parse(
|
||||
"<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
|
||||
/* is_partial= */ true,
|
||||
{COMMON_CHAT_FORMAT_GRANITE}));
|
||||
assert_msg_equals(
|
||||
message_assist_call_cutoff_args,
|
||||
common_chat_parse(
|
||||
"<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
}));
|
||||
|
||||
// Test parsing tool calls with thinking
|
||||
assert_msg_equals(
|
||||
message_assist_call_thoughts,
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think><|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
}));
|
||||
|
||||
// Test template generation for regular content
|
||||
test_templates(tmpls.get(), end_tokens, message_assist, tools,
|
||||
|
||||
Reference in New Issue
Block a user