mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	llama : tokenizer fixes (#2549)
* Merge tokenizer fixes into the gguf branch. * Add test vocabularies
This commit is contained in:
		@@ -67,7 +67,7 @@ int main(int argc, char ** argv) {
 | 
			
		||||
        fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
 | 
			
		||||
        fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
 | 
			
		||||
        for (int i = 0; i < (int) embd_inp.size(); i++) {
 | 
			
		||||
            fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]));
 | 
			
		||||
            fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]).c_str());
 | 
			
		||||
        }
 | 
			
		||||
        fprintf(stderr, "\n");
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user