mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-28 08:31:25 +00:00 
			
		
		
		
	server : add system_fingerprint to chat/completion (#10917)
* server : add system_fingerprint to chat/completion * update README
This commit is contained in:
		| @@ -724,7 +724,8 @@ This endpoint is public (no API key check). By default, it is read-only. To make | ||||
|   }, | ||||
|   "total_slots": 1, | ||||
|   "model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", | ||||
|   "chat_template": "..." | ||||
|   "chat_template": "...", | ||||
|   "build_info": "b(build number)-(build commit hash)" | ||||
| } | ||||
| ``` | ||||
|  | ||||
|   | ||||
| @@ -595,10 +595,11 @@ struct server_task_result_cmpl_final : server_task_result { | ||||
|         std::time_t t = std::time(0); | ||||
|  | ||||
|         json res = json { | ||||
|             {"choices", json::array({choice})}, | ||||
|             {"created", t}, | ||||
|             {"model", oaicompat_model}, | ||||
|             {"object", "chat.completion"}, | ||||
|             {"choices",            json::array({choice})}, | ||||
|             {"created",            t}, | ||||
|             {"model",              oaicompat_model}, | ||||
|             {"system_fingerprint", build_info}, | ||||
|             {"object",             "chat.completion"}, | ||||
|             {"usage", json { | ||||
|                 {"completion_tokens", n_decoded}, | ||||
|                 {"prompt_tokens",     n_prompt_tokens}, | ||||
| @@ -632,11 +633,12 @@ struct server_task_result_cmpl_final : server_task_result { | ||||
|         }; | ||||
|  | ||||
|         json ret = json { | ||||
|             {"choices", json::array({choice})}, | ||||
|             {"created", t}, | ||||
|             {"id",      oaicompat_cmpl_id}, | ||||
|             {"model",   oaicompat_model}, | ||||
|             {"object",  "chat.completion.chunk"}, | ||||
|             {"choices",            json::array({choice})}, | ||||
|             {"created",            t}, | ||||
|             {"id",                 oaicompat_cmpl_id}, | ||||
|             {"model",              oaicompat_model}, | ||||
|             {"system_fingerprint", build_info}, | ||||
|             {"object",             "chat.completion.chunk"}, | ||||
|             {"usage", json { | ||||
|                 {"completion_tokens", n_decoded}, | ||||
|                 {"prompt_tokens",     n_prompt_tokens}, | ||||
| @@ -761,11 +763,12 @@ struct server_task_result_cmpl_partial : server_task_result { | ||||
|         } | ||||
|  | ||||
|         json ret = json { | ||||
|             {"choices", choices}, | ||||
|             {"created", t}, | ||||
|             {"id",      oaicompat_cmpl_id}, | ||||
|             {"model",   oaicompat_model}, | ||||
|             {"object",  "chat.completion.chunk"} | ||||
|             {"choices",            choices}, | ||||
|             {"created",            t}, | ||||
|             {"id",                 oaicompat_cmpl_id}, | ||||
|             {"model",              oaicompat_model}, | ||||
|             {"system_fingerprint", build_info}, | ||||
|             {"object",             "chat.completion.chunk"} | ||||
|         }; | ||||
|  | ||||
|         if (timings.prompt_n >= 0) { | ||||
| @@ -3476,6 +3479,7 @@ int main(int argc, char ** argv) { | ||||
|             { "total_slots",                 ctx_server.params_base.n_parallel }, | ||||
|             { "model_path",                  ctx_server.params_base.model }, | ||||
|             { "chat_template",               llama_get_chat_template(ctx_server.model) }, | ||||
|             { "build_info",                  build_info }, | ||||
|         }; | ||||
|  | ||||
|         res_ok(res, data); | ||||
|   | ||||
| @@ -31,6 +31,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte | ||||
|     }) | ||||
|     assert res.status_code == 200 | ||||
|     assert "cmpl" in res.body["id"] # make sure the completion id has the expected format | ||||
|     assert res.body["system_fingerprint"].startswith("b") | ||||
|     assert res.body["model"] == model if model is not None else server.model_alias | ||||
|     assert res.body["usage"]["prompt_tokens"] == n_prompt | ||||
|     assert res.body["usage"]["completion_tokens"] == n_predicted | ||||
| @@ -63,6 +64,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte | ||||
|     last_cmpl_id = None | ||||
|     for data in res: | ||||
|         choice = data["choices"][0] | ||||
|         assert data["system_fingerprint"].startswith("b") | ||||
|         assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future | ||||
|         if last_cmpl_id is None: | ||||
|             last_cmpl_id = data["id"] | ||||
| @@ -92,6 +94,7 @@ def test_chat_completion_with_openai_library(): | ||||
|         seed=42, | ||||
|         temperature=0.8, | ||||
|     ) | ||||
|     assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b") | ||||
|     assert res.choices[0].finish_reason == "length" | ||||
|     assert res.choices[0].message.content is not None | ||||
|     assert match_regex("(Suddenly)+", res.choices[0].message.content) | ||||
|   | ||||
| @@ -56,6 +56,8 @@ static T json_value(const json & body, const std::string & key, const T & defaul | ||||
|     } | ||||
| } | ||||
|  | ||||
| const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT); | ||||
|  | ||||
| // | ||||
| // tokenizer and input processing utils | ||||
| // | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Xuan Son Nguyen
					Xuan Son Nguyen