mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	speculative : do not discard the last drafted token
This commit is contained in:
		| @@ -274,7 +274,7 @@ struct server_task { | ||||
|         params.speculative.p_min = json_value(data, "speculative.p_min", defaults.speculative.p_min); | ||||
|  | ||||
|         params.speculative.n_min = std::min(params.speculative.n_max, params.speculative.n_min); | ||||
|         params.speculative.n_min = std::max(params.speculative.n_min, 2); | ||||
|         params.speculative.n_min = std::max(params.speculative.n_min, 0); | ||||
|         params.speculative.n_max = std::max(params.speculative.n_max, 0); | ||||
|  | ||||
|         // Use OpenAI API logprobs only if n_probs wasn't provided | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov