mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	Merge branch 'master' into compilade/refactor-kv-cache
This commit is contained in:
		@@ -1991,6 +1991,12 @@ int main(int argc, char ** argv) {
 | 
			
		||||
        params.n_batch = std::min(params.n_batch, n_kv);
 | 
			
		||||
    } else {
 | 
			
		||||
        params.n_batch = std::min(params.n_batch, params.n_ctx);
 | 
			
		||||
        if (params.kl_divergence) {
 | 
			
		||||
            params.n_parallel = 1;
 | 
			
		||||
        } else {
 | 
			
		||||
            // ensure there's at least enough seq_ids for HellaSwag
 | 
			
		||||
            params.n_parallel = std::max(4, params.n_parallel);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (params.ppl_stride > 0) {
 | 
			
		||||
@@ -2015,9 +2021,6 @@ int main(int argc, char ** argv) {
 | 
			
		||||
    llama_model * model;
 | 
			
		||||
    llama_context * ctx;
 | 
			
		||||
 | 
			
		||||
    // ensure there's at least enough seq_ids for HellaSwag
 | 
			
		||||
    params.n_parallel = std::max(4, params.n_parallel);
 | 
			
		||||
 | 
			
		||||
    // load the model and apply lora adapter, if any
 | 
			
		||||
    std::tie(model, ctx) = llama_init_from_gpt_params(params);
 | 
			
		||||
    if (model == NULL) {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user