mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	examples : add tokenize (#4039)
This commit is contained in:
		| @@ -24,6 +24,7 @@ else() | |||||||
|     add_subdirectory(llama-bench) |     add_subdirectory(llama-bench) | ||||||
|     add_subdirectory(llava) |     add_subdirectory(llava) | ||||||
|     add_subdirectory(main) |     add_subdirectory(main) | ||||||
|  |     add_subdirectory(tokenize) | ||||||
|     add_subdirectory(parallel) |     add_subdirectory(parallel) | ||||||
|     add_subdirectory(perplexity) |     add_subdirectory(perplexity) | ||||||
|     add_subdirectory(quantize) |     add_subdirectory(quantize) | ||||||
|   | |||||||
							
								
								
									
										5
									
								
								examples/tokenize/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								examples/tokenize/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | |||||||
|  | set(TARGET tokenize) | ||||||
|  | add_executable(${TARGET} tokenize.cpp) | ||||||
|  | install(TARGETS ${TARGET} RUNTIME) | ||||||
|  | target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) | ||||||
|  | target_compile_features(${TARGET} PRIVATE cxx_std_11) | ||||||
							
								
								
									
										44
									
								
								examples/tokenize/tokenize.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								examples/tokenize/tokenize.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | |||||||
|  | #include "common.h" | ||||||
|  | #include "llama.h" | ||||||
|  |  | ||||||
|  | #include <cmath> | ||||||
|  | #include <cstdio> | ||||||
|  | #include <string> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
|  | int main(int argc, char ** argv) { | ||||||
|  |     if (argc < 3 || argv[1][0] == '-') { | ||||||
|  |         printf("usage: %s MODEL_PATH PROMPT [--ids]\n" , argv[0]); | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     auto model_path = argv[1]; | ||||||
|  |     auto prompt = argv[2]; | ||||||
|  |  | ||||||
|  |     const bool printing_ids = argc > 3 && std::string(argv[3]) == "--ids"; | ||||||
|  |  | ||||||
|  |     llama_backend_init(false); | ||||||
|  |  | ||||||
|  |     llama_model_params model_params = llama_model_default_params(); | ||||||
|  |     model_params.vocab_only = true; | ||||||
|  |     llama_model * model = llama_load_model_from_file(model_path, model_params); | ||||||
|  |  | ||||||
|  |     llama_context_params ctx_params = llama_context_default_params(); | ||||||
|  |     llama_context * ctx = llama_new_context_with_model(model, ctx_params); | ||||||
|  |  | ||||||
|  |     const bool add_bos = true; | ||||||
|  |  | ||||||
|  |     std::vector<llama_token> tokens; | ||||||
|  |  | ||||||
|  |     tokens = ::llama_tokenize(model, prompt, add_bos, true); | ||||||
|  |  | ||||||
|  |     for (int i = 0; i < (int) tokens.size(); i++) { | ||||||
|  |         if (printing_ids) { | ||||||
|  |             printf("%d\n", tokens[i]); | ||||||
|  |         } else { | ||||||
|  |             printf("%6d -> '%s'\n", tokens[i], llama_token_to_piece(ctx, tokens[i]).c_str());     | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user
	 zakkor
					zakkor