mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	kv-cache : use ggml_set_rows (#14285)
* kv-cache : use ggml_set_rows ggml-ci * graph : separate k and v indices ggml-ci * cont : remove redundant ifs ggml-ci * kv-cache : improve find_slot impl * kv-cache : bounds-check when accessing slot_info indices * kv-cache : add comments ggml-ci * ggml : add TODOs for adding GGML_OP_SET_ROWS support in the backends ggml-ci
This commit is contained in:
		| @@ -195,11 +195,11 @@ llama_memory_hybrid_context::llama_memory_hybrid_context( | ||||
|  | ||||
| llama_memory_hybrid_context::llama_memory_hybrid_context( | ||||
|               llama_memory_hybrid * mem, | ||||
|             std::vector<uint32_t>   heads_attn, | ||||
|                   slot_info_vec_t   sinfos_attn, | ||||
|         std::vector<llama_ubatch>   ubatches) : | ||||
|     ubatches(std::move(ubatches)), | ||||
|     // note: here we copy the ubatches. not sure if this is ideal | ||||
|     ctx_attn(new llama_kv_cache_unified_context(mem->get_mem_attn(), std::move(heads_attn), this->ubatches)), | ||||
|     ctx_attn(new llama_kv_cache_unified_context(mem->get_mem_attn(), std::move(sinfos_attn), this->ubatches)), | ||||
|     ctx_recr(new llama_memory_recurrent_context(mem->get_mem_recr(),                        this->ubatches)), | ||||
|     status(llama_memory_status_combine(ctx_attn->get_status(), ctx_recr->get_status())) { | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov