mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : do not allocate KV cache for "vocab_only == true" (#682)
Fixes sanitizer CI
This commit is contained in:
		| @@ -1608,7 +1608,7 @@ struct llama_context * llama_init_from_file( | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     // reserve memory for context buffers |     // reserve memory for context buffers | ||||||
|     { |     if (!params.vocab_only) { | ||||||
|         if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, ctx->model.hparams.n_ctx)) { |         if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, ctx->model.hparams.n_ctx)) { | ||||||
|             fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__); |             fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__); | ||||||
|             llama_free(ctx); |             llama_free(ctx); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Stephan Walter
					Stephan Walter