mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : tokenizer fixes (#2549)
* Merge tokenizer fixes into the gguf branch. * Add test vocabularies
This commit is contained in:
		| @@ -633,17 +633,6 @@ std::string gpt_random_prompt(std::mt19937 & rng) { | ||||
|     return "The"; | ||||
| } | ||||
|  | ||||
| // TODO: not great allocating this every time | ||||
| std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) { | ||||
|     // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars | ||||
|     std::vector<llama_token> res(text.size() + (int) add_bos); | ||||
|     const int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos); | ||||
|     assert(n >= 0); | ||||
|     res.resize(n); | ||||
|  | ||||
|     return res; | ||||
| } | ||||
|  | ||||
| struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) { | ||||
|     auto lparams = llama_context_default_params(); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 goerch
					goerch