mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	common : avoid unnecessary logits fetch (#8358)
This commit is contained in:
		| @@ -282,8 +282,6 @@ static llama_token llama_sampling_sample_impl( | ||||
|         GGML_ASSERT(!original_logits.empty()); | ||||
|     } | ||||
|     llama_token id = 0; | ||||
|     // Get a pointer to the logits | ||||
|     float * logits = llama_get_logits_ith(ctx_main, idx); | ||||
|  | ||||
|     if (temp < 0.0) { | ||||
|         // greedy sampling, with probs | ||||
| @@ -324,6 +322,9 @@ static llama_token llama_sampling_sample_impl( | ||||
|     } | ||||
|  | ||||
|     if (ctx_sampling->grammar != NULL && !is_resampling) { | ||||
|         // Get a pointer to the logits | ||||
|         float * logits = llama_get_logits_ith(ctx_main, idx); | ||||
|  | ||||
|         // Create an array with a single token data element for the sampled id | ||||
|         llama_token_data single_token_data = {id, logits[id], 0.0f}; | ||||
|         llama_token_data_array single_token_data_array = { &single_token_data, 1, false }; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user