llama : add API for token type

ggml-ci
This commit is contained in:
Georgi Gerganov
2023-08-21 19:35:31 +03:00
parent 8d177eddeb
commit 0b53b8b08d
6 changed files with 115 additions and 116 deletions

View File

@@ -139,14 +139,16 @@ void print_sample_weights(TransformerWeights *w){
struct llama_vocab {
using id = int32_t;
using token = std::string;
using ttype = llama_token_type;
struct token_score {
token tok;
struct token_data {
token text;
float score;
ttype type;
};
std::unordered_map<token, id> token_to_id;
std::vector<token_score> id_to_token;
std::vector<token_data> id_to_token;
};
struct my_llama_hparams {
@@ -516,36 +518,30 @@ void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab)
struct llama_model * lmodel = llama_load_model_from_file(filename, llama_params);
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
std::vector<const char *> strings;
std::vector<float> scores;
int n_vocab = llama_n_vocab(lctx);
strings.resize(n_vocab, NULL);
scores.resize(n_vocab, 0);
n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
const int n_vocab = llama_n_vocab(lctx);
vocab->id_to_token.resize(n_vocab);
for (int i=0; i<n_vocab; ++i) {
std::string tok = std::string(strings[i]);
float score = scores[i];
vocab->id_to_token[i].tok = tok;
vocab->id_to_token[i].score = score;
vocab->token_to_id.emplace(tok, i);
vocab->id_to_token[i].text = llama_token_get_text(lctx, i);
vocab->id_to_token[i].score = llama_token_get_score(lctx, i);
vocab->id_to_token[i].type = llama_token_get_type(lctx, i);
vocab->token_to_id.emplace(vocab->id_to_token[i].text, i);
}
llama_free(lctx);
llama_free_model(lmodel);
} else { // assume llama2.c vocabulary
printf("Assuming llama2.c vocabulary since %s is not a ggml file\n", filename);
llama_file file(filename, "rb");
uint32_t n_vocab = config->vocab_size;
const int n_vocab = config->vocab_size;
/* uint32_t max_token_length = */ file.read_u32(); // unused
vocab->id_to_token.resize(n_vocab);
for (uint32_t i=0; i<n_vocab; ++i) {
for (int i=0; i<n_vocab; ++i) {
float_t score = file.read_f32();
uint32_t len = file.read_u32();
std::string tok = file.read_string(len);
vocab->id_to_token[i].tok = tok;
std::string text = file.read_string(len);
vocab->id_to_token[i].text = text;
vocab->id_to_token[i].score = score;
vocab->token_to_id.emplace(tok, i);
vocab->id_to_token[i].type = LLAMA_TOKEN_TYPE_UNDEFINED;
vocab->token_to_id.emplace(text, i);
}
}
}
@@ -611,10 +607,10 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod
// // write_vocab - for now we are just writing the existing BPE voc. assuming karpathy's vocabulary is the same. idk.
// uint32_t n_vocab = model->hparams.n_vocab;
// for (uint32_t i = 0; i < n_vocab; i++) {
// const auto & token_score = vocab->id_to_token.at(i);
// file.write_u32((uint32_t) token_score.tok.size());
// file.write_raw(token_score.tok.data(), token_score.tok.size());
// file.write_raw(&token_score.score, sizeof(token_score.score));
// const auto & token_data = vocab->id_to_token.at(i);
// file.write_u32((uint32_t) token_data.tok.size());
// file.write_raw(token_data.tok.data(), token_data.tok.size());
// file.write_raw(&token_data.score, sizeof(token_data.score));
// }
//
// // stuff AK weights into GG weights one by one.