mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	| @@ -412,7 +412,7 @@ node index.js | |||||||
|  |  | ||||||
|     `id_slot`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot.  Default: `-1` |     `id_slot`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot.  Default: `-1` | ||||||
|  |  | ||||||
|     `cache_prompt`: Re-use KV cache from a previous request if possible. This way the common prefix does not have to be re-processed, only the suffix that differs between the requests. Because (depending on the backend) the logits are **not** guaranteed to be bit-for-bit identical for different batch sizes (prompt processing vs. token generation) enabling this option can cause nondeterministic results. Default: `false` |     `cache_prompt`: Re-use KV cache from a previous request if possible. This way the common prefix does not have to be re-processed, only the suffix that differs between the requests. Because (depending on the backend) the logits are **not** guaranteed to be bit-for-bit identical for different batch sizes (prompt processing vs. token generation) enabling this option can cause nondeterministic results. Default: `true` | ||||||
|  |  | ||||||
|     `samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. Default: `["dry", "top_k", "typ_p", "top_p", "min_p", "xtc", "temperature"]` - these are all the available values. |     `samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. Default: `["dry", "top_k", "typ_p", "top_p", "min_p", "xtc", "temperature"]` - these are all the available values. | ||||||
|  |  | ||||||
|   | |||||||
| @@ -111,7 +111,7 @@ struct server_static_file { | |||||||
|  |  | ||||||
| struct slot_params { | struct slot_params { | ||||||
|     bool stream       = true; |     bool stream       = true; | ||||||
|     bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt |     bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt | ||||||
|  |  | ||||||
|     int32_t n_keep    =  0; // number of tokens to keep from initial prompt |     int32_t n_keep    =  0; // number of tokens to keep from initial prompt | ||||||
|     int32_t n_discard =  0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half |     int32_t n_discard =  0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half | ||||||
| @@ -883,7 +883,7 @@ struct server_context { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         slot.params.stream           = json_value(data, "stream",             false); |         slot.params.stream           = json_value(data, "stream",             false); | ||||||
|         slot.params.cache_prompt     = json_value(data, "cache_prompt",       false); |         slot.params.cache_prompt     = json_value(data, "cache_prompt",       true); | ||||||
|         slot.params.n_predict        = json_value(data, "n_predict",          json_value(data, "max_tokens", defaults.n_predict)); |         slot.params.n_predict        = json_value(data, "n_predict",          json_value(data, "max_tokens", defaults.n_predict)); | ||||||
|         slot.params.n_indent         = json_value(data, "n_indent",           defaults.n_indent); |         slot.params.n_indent         = json_value(data, "n_indent",           defaults.n_indent); | ||||||
|         slot.params.n_keep           = json_value(data, "n_keep",             defaults.n_keep); |         slot.params.n_keep           = json_value(data, "n_keep",             defaults.n_keep); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov