mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	server : fix system prompt cli (#5516)
This commit is contained in:
		| @@ -436,10 +436,6 @@ struct llama_server_context | |||||||
|         default_generation_settings_for_props["seed"] = -1; |         default_generation_settings_for_props["seed"] = -1; | ||||||
|  |  | ||||||
|         batch = llama_batch_init(n_ctx, 0, params.n_parallel); |         batch = llama_batch_init(n_ctx, 0, params.n_parallel); | ||||||
|  |  | ||||||
|         // empty system prompt |  | ||||||
|         system_prompt = ""; |  | ||||||
|         system_tokens.clear(); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const |     std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const | ||||||
| @@ -765,12 +761,14 @@ struct llama_server_context | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     void update_system_prompt() { |     void update_system_prompt() { | ||||||
|  |         kv_cache_clear(); | ||||||
|  |         system_tokens.clear(); | ||||||
|  |  | ||||||
|  |         if (!system_prompt.empty()) { | ||||||
|             system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token); |             system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token); | ||||||
|  |  | ||||||
|             llama_batch_clear(batch); |             llama_batch_clear(batch); | ||||||
|  |  | ||||||
|         kv_cache_clear(); |  | ||||||
|  |  | ||||||
|             for (int i = 0; i < (int)system_tokens.size(); ++i) |             for (int i = 0; i < (int)system_tokens.size(); ++i) | ||||||
|             { |             { | ||||||
|                 llama_batch_add(batch, system_tokens[i], i, { 0 }, false); |                 llama_batch_add(batch, system_tokens[i], i, { 0 }, false); | ||||||
| @@ -787,6 +785,7 @@ struct llama_server_context | |||||||
|             { |             { | ||||||
|                 llama_kv_cache_seq_cp(ctx, 0, i, 0, system_tokens.size()); |                 llama_kv_cache_seq_cp(ctx, 0, i, 0, system_tokens.size()); | ||||||
|             } |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         LOG_TEE("system prompt updated\n"); |         LOG_TEE("system prompt updated\n"); | ||||||
|         system_need_update = false; |         system_need_update = false; | ||||||
| @@ -807,11 +806,9 @@ struct llama_server_context | |||||||
|         name_user      = sys_props.value("anti_prompt", ""); |         name_user      = sys_props.value("anti_prompt", ""); | ||||||
|         name_assistant = sys_props.value("assistant_name", ""); |         name_assistant = sys_props.value("assistant_name", ""); | ||||||
|  |  | ||||||
|         if (slots.size() > 0) |  | ||||||
|         { |  | ||||||
|         notify_system_prompt_changed(); |         notify_system_prompt_changed(); | ||||||
|     } |     } | ||||||
|     } |  | ||||||
|  |  | ||||||
|     static size_t find_stopping_strings(const std::string &text, const size_t last_token_size, |     static size_t find_stopping_strings(const std::string &text, const size_t last_token_size, | ||||||
|                                         const stop_type type, llama_client_slot &slot) |                                         const stop_type type, llama_client_slot &slot) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Rőczey Barnabás
					Rőczey Barnabás