mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Merge branch 'master' into compilade/refactor-kv-cache
This commit is contained in:
		| @@ -229,7 +229,7 @@ private func tokenize(text: String, add_bos: Bool) -> [llama_token] { | ||||
|  | ||||
| private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String? { | ||||
|     var result = [CChar](repeating: 0, count: 8) | ||||
|     let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count), false) | ||||
|     let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count), 0, false) | ||||
|     if nTokens < 0 { | ||||
|         let actualTokensCount = -Int(nTokens) | ||||
|         result = .init(repeating: 0, count: actualTokensCount) | ||||
| @@ -238,6 +238,7 @@ private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String | ||||
|             token, | ||||
|             &result, | ||||
|             Int32(result.count), | ||||
|             0, | ||||
|             false | ||||
|         ) | ||||
|         assert(check == actualTokensCount) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin