mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	kv-cache : use ggml_set_rows (#14285)
* kv-cache : use ggml_set_rows ggml-ci * graph : separate k and v indices ggml-ci * cont : remove redundant ifs ggml-ci * kv-cache : improve find_slot impl * kv-cache : bounds-check when accessing slot_info indices * kv-cache : add comments ggml-ci * ggml : add TODOs for adding GGML_OP_SET_ROWS support in the backends ggml-ci
This commit is contained in:
		| @@ -249,8 +249,14 @@ public: | ||||
|  | ||||
|     void set_input(const llama_ubatch * ubatch) override; | ||||
|  | ||||
|     ggml_tensor * get_k_idxs() const { return self_k_idxs; } | ||||
|     ggml_tensor * get_v_idxs() const { return self_v_idxs; } | ||||
|  | ||||
|     ggml_tensor * get_kq_mask() const { return self_kq_mask_cnv; } | ||||
|  | ||||
|     ggml_tensor * self_k_idxs = nullptr; // I64 [n_batch] | ||||
|     ggml_tensor * self_v_idxs = nullptr; // I64 [n_batch] | ||||
|  | ||||
|     ggml_tensor * self_kq_mask     = nullptr; // F32 [n_kv, n_batch] | ||||
|     ggml_tensor * self_kq_mask_cnv = nullptr; //     [n_kv, n_batch] | ||||
|  | ||||
| @@ -274,9 +280,19 @@ public: | ||||
|  | ||||
|     void set_input(const llama_ubatch * ubatch) override; | ||||
|  | ||||
|     ggml_tensor * get_k_idxs()     const { return self_k_idxs; } | ||||
|     ggml_tensor * get_v_idxs()     const { return self_v_idxs; } | ||||
|     ggml_tensor * get_k_idxs_swa() const { return self_k_idxs_swa; } | ||||
|     ggml_tensor * get_v_idxs_swa() const { return self_v_idxs_swa; } | ||||
|  | ||||
|     ggml_tensor * get_kq_mask()     const { return self_kq_mask_cnv; } | ||||
|     ggml_tensor * get_kq_mask_swa() const { return self_kq_mask_swa_cnv; } | ||||
|  | ||||
|     ggml_tensor * self_k_idxs     = nullptr; // I64 [n_batch] | ||||
|     ggml_tensor * self_v_idxs     = nullptr; // I64 [n_batch] | ||||
|     ggml_tensor * self_k_idxs_swa = nullptr; // I64 [n_batch] | ||||
|     ggml_tensor * self_v_idxs_swa = nullptr; // I64 [n_batch] | ||||
|  | ||||
|     ggml_tensor * self_kq_mask         = nullptr; // F32 [n_kv, n_batch] | ||||
|     ggml_tensor * self_kq_mask_cnv     = nullptr; //     [n_kv, n_batch] | ||||
|     ggml_tensor * self_kq_mask_swa     = nullptr; // F32 [n_kv, n_batch] | ||||
| @@ -319,8 +335,14 @@ public: | ||||
|  | ||||
|     ggml_tensor * s_copy; // I32 [kv_size] | ||||
|  | ||||
|     ggml_tensor * get_k_idxs() const { return self_k_idxs; } | ||||
|     ggml_tensor * get_v_idxs() const { return self_v_idxs; } | ||||
|  | ||||
|     ggml_tensor * get_kq_mask() const { return self_kq_mask_cnv; } | ||||
|  | ||||
|     ggml_tensor * self_k_idxs = nullptr; // I64 [n_batch] | ||||
|     ggml_tensor * self_v_idxs = nullptr; // I64 [n_batch] | ||||
|  | ||||
|     ggml_tensor * self_kq_mask     = nullptr; // F32 [n_kv, n_batch] | ||||
|     ggml_tensor * self_kq_mask_cnv = nullptr; //     [n_kv, n_batch] | ||||
|  | ||||
| @@ -336,7 +358,7 @@ public: | ||||
|     llm_graph_input_one() {} | ||||
|     virtual ~llm_graph_input_one() = default; | ||||
|  | ||||
|     void set_input(const llama_ubatch *) override; | ||||
|     void set_input(const llama_ubatch * ubatch) override; | ||||
|  | ||||
|     ggml_tensor * one = nullptr; // F32 | ||||
| }; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov