mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	server : send token probs for "stream == false" (#4714)
This commit is contained in:
		| @@ -1325,7 +1325,7 @@ struct llama_server_context | |||||||
|             { |             { | ||||||
|                 probs = std::vector<completion_token_output>( |                 probs = std::vector<completion_token_output>( | ||||||
|                                     slot.generated_token_probs.begin(), |                                     slot.generated_token_probs.begin(), | ||||||
|                                     slot.generated_token_probs.begin() + slot.sent_token_probs_index); |                                     slot.generated_token_probs.end()); | ||||||
|             } |             } | ||||||
|             res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs); |             res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs); | ||||||
|         } |         } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov