mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	 16bc66d947
			
		
	
	16bc66d947
	
	
	
		
			
			* llama.cpp : split llama_context_params into model and context params ggml-ci * fix metal build * fix freq_base/scale default to model value * llama-bench : keep the same model between tests when possible * move n_threads to llama_context_params, add n_threads_batch * fix mpi build * remove kv_size(), cuda scratch fixes * remove low-vram option * add n_threads_batch to system info, refactor to get_system_info() * add documentation about --threads-batch to the READMEs * llama-bench fix * main : fix rope freq/scale warning * llama.cpp : add llama_get_model common : add llama_tokenize from model * remove duplicated ctx/model functions ggml-ci * cuda : print total VRAM used
		
			
				
	
	
		
			36 lines
		
	
	
		
			969 B
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			36 lines
		
	
	
		
			969 B
		
	
	
	
		
			C++
		
	
	
	
	
	
| #include "embd-input.h"
 | |
| #include <stdlib.h>
 | |
| #include <random>
 | |
| #include <string.h>
 | |
| 
 | |
| int main(int argc, char** argv) {
 | |
| 
 | |
|     auto mymodel = create_mymodel(argc, argv);
 | |
|     int N = 10;
 | |
|     int max_tgt_len = 500;
 | |
|     int n_embd = llama_n_embd(llama_get_model(mymodel->ctx));
 | |
| 
 | |
|     // add random float embd to test evaluation
 | |
|     float * data = new float[N*n_embd];
 | |
|     std::default_random_engine e;
 | |
|     std::uniform_real_distribution<float>  u(0,1);
 | |
|     for (int i=0;i<N*n_embd;i++) {
 | |
|         data[i] = u(e);
 | |
|     }
 | |
| 
 | |
|     eval_string(mymodel, "user: what is the color of the flag of UN?");
 | |
|     eval_float(mymodel, data, N);
 | |
|     eval_string(mymodel, "assistant:");
 | |
|     eval_string(mymodel, mymodel->params.prompt.c_str());
 | |
|     const char* tmp;
 | |
|     for (int i=0; i<max_tgt_len; i++) {
 | |
|         tmp = sampling(mymodel);
 | |
|         if (strcmp(tmp, "</s>")==0) break;
 | |
|         printf("%s", tmp);
 | |
|         fflush(stdout);
 | |
|     }
 | |
|     printf("\n");
 | |
|     free_mymodel(mymodel);
 | |
|     return 0;
 | |
| }
 |