llama : tokenizer fixes (#2549)

* Merge tokenizer fixes into the gguf branch.

* Add test vocabularies
This commit is contained in:
goerch
2023-08-14 18:30:28 +02:00
committed by GitHub
parent 8af3a99ff1
commit ec1b100720
17 changed files with 612 additions and 147 deletions

View File

@@ -2,6 +2,7 @@
#pragma once
#define LLAMA_API_CPP // TODO: eliminate me
#include "llama.h"
#include <string>
@@ -100,12 +101,6 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
std::string gpt_random_prompt(std::mt19937 & rng);
//
// Vocab utils
//
std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
//
// Model utils
//