mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	server : send token probs for "stream == false" (#4714)
This commit is contained in:
		| @@ -1265,7 +1265,7 @@ struct llama_server_context | |||||||
|         { |         { | ||||||
|             std::vector<completion_token_output> probs_output = {}; |             std::vector<completion_token_output> probs_output = {}; | ||||||
|             const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false); |             const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false); | ||||||
|             size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); |             size_t probs_pos      = std::min(slot.sent_token_probs_index,                       slot.generated_token_probs.size()); | ||||||
|             size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); |             size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); | ||||||
|             if (probs_pos < probs_stop_pos) |             if (probs_pos < probs_stop_pos) | ||||||
|             { |             { | ||||||
| @@ -1325,7 +1325,7 @@ struct llama_server_context | |||||||
|             { |             { | ||||||
|                 probs = std::vector<completion_token_output>( |                 probs = std::vector<completion_token_output>( | ||||||
|                                     slot.generated_token_probs.begin(), |                                     slot.generated_token_probs.begin(), | ||||||
|                                     slot.generated_token_probs.begin() + slot.sent_token_probs_index); |                                     slot.generated_token_probs.end()); | ||||||
|             } |             } | ||||||
|             res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs); |             res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs); | ||||||
|         } |         } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov