mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	Server: Don't ignore llama.cpp params (#8754)
* Don't ignore llama.cpp params * Add fallback for max_tokens
This commit is contained in:
		| @@ -900,7 +900,7 @@ struct server_context { | ||||
|  | ||||
|         slot.params.stream             = json_value(data, "stream",            false); | ||||
|         slot.params.cache_prompt       = json_value(data, "cache_prompt",      false); | ||||
|         slot.params.n_predict          = json_value(data, "n_predict",         default_params.n_predict); | ||||
|         slot.params.n_predict          = json_value(data, "n_predict",         json_value(data, "max_tokens", default_params.n_predict)); | ||||
|         slot.sparams.top_k             = json_value(data, "top_k",             default_sparams.top_k); | ||||
|         slot.sparams.top_p             = json_value(data, "top_p",             default_sparams.top_p); | ||||
|         slot.sparams.min_p             = json_value(data, "min_p",             default_sparams.min_p); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 ardfork
					ardfork