mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	sampling : deduplicated code for probability distribution access (#6240)
* sampling: remove duplicated code for probability distribution access * free original_logits * fix original_logits allocation * fixes based on review @cebtenzzre * change function name to `llama_sampling_prepare`
This commit is contained in:
		| @@ -131,12 +131,14 @@ llama_token llama_sampling_sample( | ||||
|         struct llama_context * ctx_cfg, | ||||
|         int idx = 0); | ||||
|  | ||||
| // returns the probability that token of given id will be sampled | ||||
| llama_token_data_array llama_sampling_probability_distribution( | ||||
| // Prepares and adjusts the set of token candidates for sampling based on penalties, biases, and sampling parameters. | ||||
| llama_token_data_array llama_sampling_prepare( | ||||
|         struct llama_sampling_context * ctx_sampling, | ||||
|         struct llama_context * ctx_main, | ||||
|         struct llama_context * ctx_cfg, | ||||
|         int idx = 0); | ||||
|         int idx = 0, | ||||
|         bool apply_grammar = true, | ||||
|         std::vector<float> * original_logits = nullptr); | ||||
|  | ||||
| void llama_sampling_accept( | ||||
|         struct llama_sampling_context * ctx_sampling, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Minsoo Cheong
					Minsoo Cheong