mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	| @@ -631,6 +631,7 @@ struct server_context { | ||||
|  | ||||
|     bool clean_kv_cache = true; | ||||
|     bool add_bos_token  = true; | ||||
|     bool has_eos_token  = false; | ||||
|  | ||||
|     int32_t n_ctx; // total context for all clients / slots | ||||
|  | ||||
| @@ -693,7 +694,7 @@ struct server_context { | ||||
|         n_ctx = llama_n_ctx(ctx); | ||||
|  | ||||
|         add_bos_token = llama_should_add_bos_token(model); | ||||
|         GGML_ASSERT(llama_add_eos_token(model) != 1); | ||||
|         has_eos_token = llama_add_eos_token(model) != 1; | ||||
|  | ||||
|         return true; | ||||
|     } | ||||
| @@ -1031,7 +1032,7 @@ struct server_context { | ||||
|         { | ||||
|             slot.sparams.logit_bias.clear(); | ||||
|  | ||||
|             if (json_value(data, "ignore_eos", false)) { | ||||
|             if (json_value(data, "ignore_eos", false) && has_eos_token) { | ||||
|                 slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY; | ||||
|             } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov