llama : tokenizer fixes (#2549)

* Merge tokenizer fixes into the gguf branch. * Add test vocabularies
2025-10-30 08:42:00 +00:00 · 2023-08-14 18:30:28 +02:00
parent 8af3a99ff1
commit ec1b100720
17 changed files with 612 additions and 147 deletions
--- a/examples/common.h
+++ b/examples/common.h
@@ -2,6 +2,7 @@

 #pragma once

+#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"

 #include <string>
@@ -100,12 +101,6 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params);

 std::string gpt_random_prompt(std::mt19937 & rng);

-//
-// Vocab utils
-//
-
-std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
-
 //
 // Model utils
 //