mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	samplers : Min-P sampler implementation [alternative to Top P/Top K] (#3841)
* Introduce the new Min-P sampler by @kalomaze The Min-P sampling method was designed as an alternative to Top-P, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. * Min-P enabled and set to 0.05 default --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: cebtenzzre <cebtenzzre@gmail.com>
This commit is contained in:
		
							
								
								
									
										26
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -7368,6 +7368,32 @@ void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * can | ||||
|     } | ||||
| } | ||||
|  | ||||
| void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep) { | ||||
|     if (p <= 0.0f || !candidates->size) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     llama_sample_softmax(ctx, candidates); | ||||
|  | ||||
|     const int64_t t_start_sample_us = ggml_time_us(); | ||||
|  | ||||
|     float scale = candidates->data[0].p; // scale by max prob | ||||
|     size_t i = 1; // first token always matches | ||||
|  | ||||
|     for (; i < candidates->size; ++i) { | ||||
|         if (candidates->data[i].p < p * scale && i >= min_keep) { | ||||
|             break; // prob too small | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Resize the output vector to keep only the matching tokens | ||||
|     candidates->size = i; | ||||
|  | ||||
|     if (ctx) { | ||||
|         ctx->t_sample_us += ggml_time_us() - t_start_sample_us; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep) { | ||||
|     if (z >= 1.0f || candidates->size <= 2) { | ||||
|         return; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 kalomaze
					kalomaze