mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	common : revert showing control tokens by default for server (#6860)
* fix: revert showing control tokens by default * feat: revert changes to default behavior of llama_token_to_piece; provide overridden declaration to receive "bool special" param to toggle showing control tokens * feat: use the overridden declaration of llama_token_to_piece from common/common.cpp to specify "false" so that control tokens are not shown in chat completion responses" * common : simplify --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		| @@ -1117,7 +1117,7 @@ struct server_context { | ||||
|  | ||||
|     bool process_token(completion_token_output & result, server_slot & slot) { | ||||
|         // remember which tokens were sampled - used for repetition penalties during sampling | ||||
|         const std::string token_str = llama_token_to_piece(ctx, result.tok); | ||||
|         const std::string token_str = llama_token_to_piece(ctx, result.tok, false); | ||||
|         slot.sampled = result.tok; | ||||
|  | ||||
|         // search stop word and delete it | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kyle Mistele
					Kyle Mistele