mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	fix server sampling: top k sampler first (#1977)
Co-authored-by: anon <anon@example.org>
This commit is contained in:
		| @@ -325,10 +325,10 @@ struct llama_server_context { | ||||
|                     id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu); | ||||
|                 } else { | ||||
|                     // Temperature sampling | ||||
|                     llama_sample_top_k(ctx, &candidates_p, top_k, 1); | ||||
|                     llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1); | ||||
|                     llama_sample_typical(ctx, &candidates_p, typical_p, 1); | ||||
|                     llama_sample_top_p(ctx, &candidates_p, top_p, 1); | ||||
|                     llama_sample_top_k(ctx, &candidates_p, top_k, 1); | ||||
|                     llama_sample_temperature(ctx, &candidates_p, temp); | ||||
|                     id = llama_sample_token(ctx, &candidates_p); | ||||
|                 } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 anon998
					anon998