mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	* llama.cpp : split llama_context_params into model and context params ggml-ci * fix metal build * fix freq_base/scale default to model value * llama-bench : keep the same model between tests when possible * move n_threads to llama_context_params, add n_threads_batch * fix mpi build * remove kv_size(), cuda scratch fixes * remove low-vram option * add n_threads_batch to system info, refactor to get_system_info() * add documentation about --threads-batch to the READMEs * llama-bench fix * main : fix rope freq/scale warning * llama.cpp : add llama_get_model common : add llama_tokenize from model * remove duplicated ctx/model functions ggml-ci * cuda : print total VRAM used
		
			
				
	
	
		
			36 lines
		
	
	
		
			969 B
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			36 lines
		
	
	
		
			969 B
		
	
	
	
		
			C++
		
	
	
	
	
	
#include "embd-input.h"
 | 
						|
#include <stdlib.h>
 | 
						|
#include <random>
 | 
						|
#include <string.h>
 | 
						|
 | 
						|
int main(int argc, char** argv) {
 | 
						|
 | 
						|
    auto mymodel = create_mymodel(argc, argv);
 | 
						|
    int N = 10;
 | 
						|
    int max_tgt_len = 500;
 | 
						|
    int n_embd = llama_n_embd(llama_get_model(mymodel->ctx));
 | 
						|
 | 
						|
    // add random float embd to test evaluation
 | 
						|
    float * data = new float[N*n_embd];
 | 
						|
    std::default_random_engine e;
 | 
						|
    std::uniform_real_distribution<float>  u(0,1);
 | 
						|
    for (int i=0;i<N*n_embd;i++) {
 | 
						|
        data[i] = u(e);
 | 
						|
    }
 | 
						|
 | 
						|
    eval_string(mymodel, "user: what is the color of the flag of UN?");
 | 
						|
    eval_float(mymodel, data, N);
 | 
						|
    eval_string(mymodel, "assistant:");
 | 
						|
    eval_string(mymodel, mymodel->params.prompt.c_str());
 | 
						|
    const char* tmp;
 | 
						|
    for (int i=0; i<max_tgt_len; i++) {
 | 
						|
        tmp = sampling(mymodel);
 | 
						|
        if (strcmp(tmp, "</s>")==0) break;
 | 
						|
        printf("%s", tmp);
 | 
						|
        fflush(stdout);
 | 
						|
    }
 | 
						|
    printf("\n");
 | 
						|
    free_mymodel(mymodel);
 | 
						|
    return 0;
 | 
						|
}
 |