mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	server : pass default --keep argument (#14120)
This commit is contained in:
		| @@ -233,6 +233,7 @@ struct server_task { | |||||||
|         slot_params defaults; |         slot_params defaults; | ||||||
|         defaults.sampling    = params_base.sampling; |         defaults.sampling    = params_base.sampling; | ||||||
|         defaults.speculative = params_base.speculative; |         defaults.speculative = params_base.speculative; | ||||||
|  |         defaults.n_keep      = params_base.n_keep; | ||||||
|  |  | ||||||
|         // enabling this will output extra debug information in the HTTP responses from the server |         // enabling this will output extra debug information in the HTTP responses from the server | ||||||
|         params.verbose           = params_base.verbosity > 9; |         params.verbose           = params_base.verbosity > 9; | ||||||
| @@ -2060,6 +2061,7 @@ struct server_context { | |||||||
|             SLT_INF(slot, "new slot n_ctx_slot = %d\n", slot.n_ctx); |             SLT_INF(slot, "new slot n_ctx_slot = %d\n", slot.n_ctx); | ||||||
|  |  | ||||||
|             slot.params.sampling = params_base.sampling; |             slot.params.sampling = params_base.sampling; | ||||||
|  |             slot.params.n_keep = params_base.n_keep; | ||||||
|  |  | ||||||
|             slot.callback_on_release = [this](int) { |             slot.callback_on_release = [this](int) { | ||||||
|                 queue_tasks.pop_deferred_task(); |                 queue_tasks.pop_deferred_task(); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Taylor
					Taylor