mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	tool-call: command r7b fix for normal responses (#11608)
				
					
				
			* fix command r7b normal response regex + add to server test * test multiline non-tool-call responses in test-chat
This commit is contained in:
		| @@ -365,7 +365,7 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_ | |||||||
|     return data; |     return data; | ||||||
| } | } | ||||||
| static common_chat_msg common_chat_parse_command_r7b(const std::string & input) { | static common_chat_msg common_chat_parse_command_r7b(const std::string & input) { | ||||||
|     static std::regex response_regex("<\\|START_RESPONSE\\|>(.*?)<\\|END_RESPONSE\\|>"); |     static std::regex response_regex("<\\|START_RESPONSE\\|>([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>"); | ||||||
|     static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>"); |     static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>"); | ||||||
|     std::smatch match; |     std::smatch match; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -273,6 +273,7 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t | |||||||
|  |  | ||||||
| @pytest.mark.slow | @pytest.mark.slow | ||||||
| @pytest.mark.parametrize("hf_repo,template_override", [ | @pytest.mark.parametrize("hf_repo,template_override", [ | ||||||
|  |     ("bartowski/c4ai-command-r7b-12-2024-GGUF:Q4_K_M",   ("CohereForAI/c4ai-command-r7b-12-2024", "tool_use")), | ||||||
|     ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), |     ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), | ||||||
|     ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"), |     ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"), | ||||||
|  |  | ||||||
| @@ -303,12 +304,13 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t | |||||||
|     # ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), |     # ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), | ||||||
|     # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), |     # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), | ||||||
| ]) | ]) | ||||||
| def test_weather_tool_call(hf_repo: str, template_override: str | Tuple[str, str | None] | None): | def test_weather(hf_repo: str, template_override: Tuple[str, str | None] | None): | ||||||
|     global server |     global server | ||||||
|  |     n_predict = 512 | ||||||
|     server.n_slots = 1 |     server.n_slots = 1 | ||||||
|     server.jinja = True |     server.jinja = True | ||||||
|     server.n_ctx = 8192 |     server.n_ctx = 8192 | ||||||
|     server.n_predict = 512 |     server.n_predict = n_predict | ||||||
|     server.model_hf_repo = hf_repo |     server.model_hf_repo = hf_repo | ||||||
|     server.model_hf_file = None |     server.model_hf_file = None | ||||||
|     if isinstance(template_override, tuple): |     if isinstance(template_override, tuple): | ||||||
| @@ -319,7 +321,7 @@ def test_weather_tool_call(hf_repo: str, template_override: str | Tuple[str, str | |||||||
|         server.chat_template = template_override |         server.chat_template = template_override | ||||||
|     server.start(timeout_seconds=TIMEOUT_SERVER_START) |     server.start(timeout_seconds=TIMEOUT_SERVER_START) | ||||||
|     res = server.make_request("POST", "/chat/completions", data={ |     res = server.make_request("POST", "/chat/completions", data={ | ||||||
|         "max_tokens": 256, |         "max_tokens": n_predict, | ||||||
|         "messages": [ |         "messages": [ | ||||||
|             {"role": "user", "content": "What is the weather in Istanbul?"}, |             {"role": "user", "content": "What is the weather in Istanbul?"}, | ||||||
|         ], |         ], | ||||||
|   | |||||||
| @@ -289,7 +289,7 @@ static void test_template(const common_chat_template & tmpl, const std::vector<s | |||||||
| static void test_template_output_parsers() { | static void test_template_output_parsers() { | ||||||
|     json text_message { |     json text_message { | ||||||
|         { "role",    "assistant"     }, |         { "role",    "assistant"     }, | ||||||
|         { "content", "Hello, world!" }, |         { "content", "Hello, world!\nWhat's up?" }, | ||||||
|     }; |     }; | ||||||
|     json tool_calls = json::array({{ |     json tool_calls = json::array({{ | ||||||
|         { "type", "function" }, |         { "type", "function" }, | ||||||
| @@ -379,7 +379,7 @@ static void test_template_output_parsers() { | |||||||
|  |  | ||||||
|     common_chat_inputs inputs_no_tools; |     common_chat_inputs inputs_no_tools; | ||||||
|     inputs_no_tools.messages = { |     inputs_no_tools.messages = { | ||||||
|         { { "role", "user" }, { "content", "Hey" } } |         { { "role", "user" }, { "content", "Hey\nThere" } } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     common_chat_inputs inputs_tools = inputs_no_tools; |     common_chat_inputs inputs_tools = inputs_no_tools; | ||||||
| @@ -408,7 +408,8 @@ static void test_template_output_parsers() { | |||||||
|                       "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" |                       "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" | ||||||
|                       "]<|END_ACTION|>"); |                       "]<|END_ACTION|>"); | ||||||
|         test_template(tmpl, end_tokens, text_message, tools, |         test_template(tmpl, end_tokens, text_message, tools, | ||||||
|                       "<|START_RESPONSE|>Hello, world!<|END_RESPONSE|>", |                       "<|START_RESPONSE|>Hello, world!\n" | ||||||
|  |                       "What's up?<|END_RESPONSE|>", | ||||||
|                       /* expect_grammar_triggered= */ false); |                       /* expect_grammar_triggered= */ false); | ||||||
|     } |     } | ||||||
|     { |     { | ||||||
| @@ -428,7 +429,7 @@ static void test_template_output_parsers() { | |||||||
|  |  | ||||||
|         assert_msg_equals(msg_from_json(text_message), |         assert_msg_equals(msg_from_json(text_message), | ||||||
|                           common_chat_parse("{\n" |                           common_chat_parse("{\n" | ||||||
|                                             "  \"response\": \"Hello, world!\"\n" |                                             "  \"response\": \"Hello, world!\\nWhat's up?\"\n" | ||||||
|                                             "}", |                                             "}", | ||||||
|                                             common_chat_params_init(tmpl, inputs_tools).format)); |                                             common_chat_params_init(tmpl, inputs_tools).format)); | ||||||
|         test_template(tmpl, end_tokens, tool_call_message_with_id, tools, |         test_template(tmpl, end_tokens, tool_call_message_with_id, tools, | ||||||
| @@ -451,7 +452,7 @@ static void test_template_output_parsers() { | |||||||
|  |  | ||||||
|         assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_params_init(tmpl, inputs_tools).format); |         assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_params_init(tmpl, inputs_tools).format); | ||||||
|  |  | ||||||
|         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); |         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); | ||||||
|         test_template( |         test_template( | ||||||
|             tmpl, end_tokens, tool_call_message_with_id, tools, |             tmpl, end_tokens, tool_call_message_with_id, tools, | ||||||
|             "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]"); |             "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]"); | ||||||
| @@ -476,7 +477,7 @@ static void test_template_output_parsers() { | |||||||
|                 inputs_tools) |                 inputs_tools) | ||||||
|                 .format); |                 .format); | ||||||
|  |  | ||||||
|         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); |         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); | ||||||
|         test_template(tmpl, end_tokens, tool_call_message, tools, |         test_template(tmpl, end_tokens, tool_call_message, tools, | ||||||
|                       "<tool_call>\n" |                       "<tool_call>\n" | ||||||
|                       "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" |                       "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" | ||||||
| @@ -516,7 +517,7 @@ static void test_template_output_parsers() { | |||||||
|  |  | ||||||
|         assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_params_init(tmpl, inputs_tools).format); |         assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_params_init(tmpl, inputs_tools).format); | ||||||
|  |  | ||||||
|         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); |         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); | ||||||
|         test_template(tmpl, end_tokens, tool_call_message, tools, |         test_template(tmpl, end_tokens, tool_call_message, tools, | ||||||
|                       "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); |                       "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); | ||||||
|     } |     } | ||||||
| @@ -528,7 +529,7 @@ static void test_template_output_parsers() { | |||||||
|         assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, |         assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, | ||||||
|                       common_chat_params_init(tmpl, inputs_tools).format); |                       common_chat_params_init(tmpl, inputs_tools).format); | ||||||
|  |  | ||||||
|         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); |         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); | ||||||
|         test_template(tmpl, end_tokens, tool_call_message, tools, |         test_template(tmpl, end_tokens, tool_call_message, tools, | ||||||
|                       "<function=special_function>{\"arg1\": 1}</function>"); |                       "<function=special_function>{\"arg1\": 1}</function>"); | ||||||
|     } |     } | ||||||
| @@ -542,7 +543,8 @@ static void test_template_output_parsers() { | |||||||
|  |  | ||||||
|         test_template(tmpl, end_tokens, text_message, {}, |         test_template(tmpl, end_tokens, text_message, {}, | ||||||
|                       "all\n" |                       "all\n" | ||||||
|                       "Hello, world!", |                       "Hello, world!\n" | ||||||
|  |                       "What's up?", | ||||||
|                       /* expect_grammar_triggered= */ false); |                       /* expect_grammar_triggered= */ false); | ||||||
|         test_template(tmpl, end_tokens, tool_call_message, tools, |         test_template(tmpl, end_tokens, tool_call_message, tools, | ||||||
|                       "special_function\n" |                       "special_function\n" | ||||||
| @@ -555,7 +557,7 @@ static void test_template_output_parsers() { | |||||||
|  |  | ||||||
|         assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_params_init(tmpl, inputs_tools).format); |         assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_params_init(tmpl, inputs_tools).format); | ||||||
|  |  | ||||||
|         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); |         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); | ||||||
|         test_template(tmpl, end_tokens, tool_call_message, tools, |         test_template(tmpl, end_tokens, tool_call_message, tools, | ||||||
|                       " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]"); |                       " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]"); | ||||||
|     } |     } | ||||||
| @@ -566,7 +568,7 @@ static void test_template_output_parsers() { | |||||||
|  |  | ||||||
|         assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format); |         assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format); | ||||||
|  |  | ||||||
|         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); |         test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); | ||||||
|         test_template(tmpl, end_tokens, tool_call_message, tools, |         test_template(tmpl, end_tokens, tool_call_message, tools, | ||||||
|                       "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" |                       "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" | ||||||
|                       "```json\n" |                       "```json\n" | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Olivier Chafik
					Olivier Chafik