mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	llama : remember and restore kv cache data pointers (#1104)
because their value is stored in buf and overwritten by memcpy
This commit is contained in:
		@@ -2092,7 +2092,11 @@ void llama_set_kv_cache(
 | 
				
			|||||||
                         int   n_token_count) {
 | 
					                         int   n_token_count) {
 | 
				
			||||||
    // Make sure we have the same kv cache setup
 | 
					    // Make sure we have the same kv cache setup
 | 
				
			||||||
    LLAMA_ASSERT(ctx->model.kv_self.buf.size == n_size);
 | 
					    LLAMA_ASSERT(ctx->model.kv_self.buf.size == n_size);
 | 
				
			||||||
 | 
					    void * k_data = ctx->model.kv_self.k->data; // remember data pointers
 | 
				
			||||||
 | 
					    void * v_data = ctx->model.kv_self.v->data; // because their value is stored in buf and overwritten by memcpy
 | 
				
			||||||
    memcpy(ctx->model.kv_self.buf.addr, kv_cache, n_size);
 | 
					    memcpy(ctx->model.kv_self.buf.addr, kv_cache, n_size);
 | 
				
			||||||
 | 
					    ctx->model.kv_self.k->data = k_data; // restore correct data pointers
 | 
				
			||||||
 | 
					    ctx->model.kv_self.v->data = v_data;
 | 
				
			||||||
    ctx->model.kv_self.n = n_token_count;
 | 
					    ctx->model.kv_self.n = n_token_count;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user