mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	llama : do not allocate KV cache for "vocab_only == true" (#682)
Fixes sanitizer CI
This commit is contained in:
		@@ -1608,7 +1608,7 @@ struct llama_context * llama_init_from_file(
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // reserve memory for context buffers
 | 
					    // reserve memory for context buffers
 | 
				
			||||||
    {
 | 
					    if (!params.vocab_only) {
 | 
				
			||||||
        if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, ctx->model.hparams.n_ctx)) {
 | 
					        if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, ctx->model.hparams.n_ctx)) {
 | 
				
			||||||
            fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
 | 
					            fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
 | 
				
			||||||
            llama_free(ctx);
 | 
					            llama_free(ctx);
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user