mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	server : add system_fingerprint to chat/completion (#10917)
* server : add system_fingerprint to chat/completion * update README
This commit is contained in:
		| @@ -724,7 +724,8 @@ This endpoint is public (no API key check). By default, it is read-only. To make | |||||||
|   }, |   }, | ||||||
|   "total_slots": 1, |   "total_slots": 1, | ||||||
|   "model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", |   "model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", | ||||||
|   "chat_template": "..." |   "chat_template": "...", | ||||||
|  |   "build_info": "b(build number)-(build commit hash)" | ||||||
| } | } | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|   | |||||||
| @@ -598,6 +598,7 @@ struct server_task_result_cmpl_final : server_task_result { | |||||||
|             {"choices",            json::array({choice})}, |             {"choices",            json::array({choice})}, | ||||||
|             {"created",            t}, |             {"created",            t}, | ||||||
|             {"model",              oaicompat_model}, |             {"model",              oaicompat_model}, | ||||||
|  |             {"system_fingerprint", build_info}, | ||||||
|             {"object",             "chat.completion"}, |             {"object",             "chat.completion"}, | ||||||
|             {"usage", json { |             {"usage", json { | ||||||
|                 {"completion_tokens", n_decoded}, |                 {"completion_tokens", n_decoded}, | ||||||
| @@ -636,6 +637,7 @@ struct server_task_result_cmpl_final : server_task_result { | |||||||
|             {"created",            t}, |             {"created",            t}, | ||||||
|             {"id",                 oaicompat_cmpl_id}, |             {"id",                 oaicompat_cmpl_id}, | ||||||
|             {"model",              oaicompat_model}, |             {"model",              oaicompat_model}, | ||||||
|  |             {"system_fingerprint", build_info}, | ||||||
|             {"object",             "chat.completion.chunk"}, |             {"object",             "chat.completion.chunk"}, | ||||||
|             {"usage", json { |             {"usage", json { | ||||||
|                 {"completion_tokens", n_decoded}, |                 {"completion_tokens", n_decoded}, | ||||||
| @@ -765,6 +767,7 @@ struct server_task_result_cmpl_partial : server_task_result { | |||||||
|             {"created",            t}, |             {"created",            t}, | ||||||
|             {"id",                 oaicompat_cmpl_id}, |             {"id",                 oaicompat_cmpl_id}, | ||||||
|             {"model",              oaicompat_model}, |             {"model",              oaicompat_model}, | ||||||
|  |             {"system_fingerprint", build_info}, | ||||||
|             {"object",             "chat.completion.chunk"} |             {"object",             "chat.completion.chunk"} | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
| @@ -3476,6 +3479,7 @@ int main(int argc, char ** argv) { | |||||||
|             { "total_slots",                 ctx_server.params_base.n_parallel }, |             { "total_slots",                 ctx_server.params_base.n_parallel }, | ||||||
|             { "model_path",                  ctx_server.params_base.model }, |             { "model_path",                  ctx_server.params_base.model }, | ||||||
|             { "chat_template",               llama_get_chat_template(ctx_server.model) }, |             { "chat_template",               llama_get_chat_template(ctx_server.model) }, | ||||||
|  |             { "build_info",                  build_info }, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         res_ok(res, data); |         res_ok(res, data); | ||||||
|   | |||||||
| @@ -31,6 +31,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte | |||||||
|     }) |     }) | ||||||
|     assert res.status_code == 200 |     assert res.status_code == 200 | ||||||
|     assert "cmpl" in res.body["id"] # make sure the completion id has the expected format |     assert "cmpl" in res.body["id"] # make sure the completion id has the expected format | ||||||
|  |     assert res.body["system_fingerprint"].startswith("b") | ||||||
|     assert res.body["model"] == model if model is not None else server.model_alias |     assert res.body["model"] == model if model is not None else server.model_alias | ||||||
|     assert res.body["usage"]["prompt_tokens"] == n_prompt |     assert res.body["usage"]["prompt_tokens"] == n_prompt | ||||||
|     assert res.body["usage"]["completion_tokens"] == n_predicted |     assert res.body["usage"]["completion_tokens"] == n_predicted | ||||||
| @@ -63,6 +64,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte | |||||||
|     last_cmpl_id = None |     last_cmpl_id = None | ||||||
|     for data in res: |     for data in res: | ||||||
|         choice = data["choices"][0] |         choice = data["choices"][0] | ||||||
|  |         assert data["system_fingerprint"].startswith("b") | ||||||
|         assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future |         assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future | ||||||
|         if last_cmpl_id is None: |         if last_cmpl_id is None: | ||||||
|             last_cmpl_id = data["id"] |             last_cmpl_id = data["id"] | ||||||
| @@ -92,6 +94,7 @@ def test_chat_completion_with_openai_library(): | |||||||
|         seed=42, |         seed=42, | ||||||
|         temperature=0.8, |         temperature=0.8, | ||||||
|     ) |     ) | ||||||
|  |     assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b") | ||||||
|     assert res.choices[0].finish_reason == "length" |     assert res.choices[0].finish_reason == "length" | ||||||
|     assert res.choices[0].message.content is not None |     assert res.choices[0].message.content is not None | ||||||
|     assert match_regex("(Suddenly)+", res.choices[0].message.content) |     assert match_regex("(Suddenly)+", res.choices[0].message.content) | ||||||
|   | |||||||
| @@ -56,6 +56,8 @@ static T json_value(const json & body, const std::string & key, const T & defaul | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT); | ||||||
|  |  | ||||||
| // | // | ||||||
| // tokenizer and input processing utils | // tokenizer and input processing utils | ||||||
| // | // | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Xuan Son Nguyen
					Xuan Son Nguyen