mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	llama : update llama_kv_self API
ggml-ci
This commit is contained in:
		@@ -82,10 +82,8 @@ static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & toke
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd) {
 | 
			
		||||
    llama_kv_cache * kv = llama_get_kv_cache(ctx);
 | 
			
		||||
 | 
			
		||||
    // clear previous kv_cache values (irrelevant for embeddings)
 | 
			
		||||
    llama_kv_cache_clear(kv);
 | 
			
		||||
    llama_kv_self_clear(ctx);
 | 
			
		||||
 | 
			
		||||
    // run model
 | 
			
		||||
    LOG_INF("%s: n_tokens = %d, n_seq = %d\n", __func__, batch.n_tokens, n_seq);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user