mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama.cpp: fix warning message (#11839)
There was a typo-like error, which would print the same number twice if request is received with n_predict > server-side config. Before the fix: ``` slot launch_slot_: id 0 | task 0 | n_predict = 4096 exceeds server configuration, setting to 4096 ``` After the fix: ``` slot launch_slot_: id 0 | task 0 | n_predict = 8192 exceeds server configuration, setting to 4096 ```
This commit is contained in:
		 Oleksandr Kuvshynov
					Oleksandr Kuvshynov
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							3e69319772
						
					
				
				
					commit
					e4376270d9
				
			| @@ -2073,8 +2073,8 @@ struct server_context { | |||||||
|  |  | ||||||
|         if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) { |         if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) { | ||||||
|             // Might be better to reject the request with a 400 ? |             // Might be better to reject the request with a 400 ? | ||||||
|  |             SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d", slot.params.n_predict, slot.n_predict); | ||||||
|             slot.params.n_predict = slot.n_predict; |             slot.params.n_predict = slot.n_predict; | ||||||
|             SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d", slot.n_predict, slot.n_predict); |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (slot.params.ignore_eos && has_eos_token) { |         if (slot.params.ignore_eos && has_eos_token) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user