llama : tokenizer fixes (#2549)

* Merge tokenizer fixes into the gguf branch.

* Add test vocabularies
This commit is contained in:
goerch
2023-08-14 18:30:28 +02:00
committed by GitHub
parent 8af3a99ff1
commit ec1b100720
17 changed files with 612 additions and 147 deletions

View File

@@ -62,7 +62,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "\n\n");
for (auto id : tokens_list) {
fprintf(stderr, "%s", llama_token_to_str(ctx, id));
fprintf(stderr, "%s", llama_token_to_str(ctx, id).c_str());
}
fflush(stderr);
@@ -109,7 +109,7 @@ int main(int argc, char ** argv) {
}
// print the new token :
printf("%s", llama_token_to_str(ctx, new_token_id));
printf("%s", llama_token_to_str(ctx, new_token_id).c_str());
fflush(stdout);
// push this new token for next evaluation