mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	convert.py : 70b model working (change attn_q permute)
This commit is contained in:
		@@ -326,6 +326,7 @@ Vocab = Union[BpeVocab, SentencePieceVocab]
 | 
				
			|||||||
#
 | 
					#
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
 | 
					def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
 | 
				
			||||||
 | 
					    #print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
 | 
				
			||||||
    if n_head_kv is not None and n_head != n_head_kv:
 | 
					    if n_head_kv is not None and n_head != n_head_kv:
 | 
				
			||||||
        n_head //= n_head_kv
 | 
					        n_head //= n_head_kv
 | 
				
			||||||
    return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
 | 
					    return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
 | 
				
			||||||
@@ -818,12 +819,12 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
 | 
				
			|||||||
    for i in itertools.count():
 | 
					    for i in itertools.count():
 | 
				
			||||||
        if f"model.layers.{i}.self_attn.q_proj.weight" in model:
 | 
					        if f"model.layers.{i}.self_attn.q_proj.weight" in model:
 | 
				
			||||||
            print(f"Permuting layer {i}")
 | 
					            print(f"Permuting layer {i}")
 | 
				
			||||||
            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head_kv)
 | 
					            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head)
 | 
				
			||||||
            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
 | 
					            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
 | 
				
			||||||
           #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
 | 
					           #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
 | 
				
			||||||
        elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
 | 
					        elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
 | 
				
			||||||
            print(f"Unpacking and permuting layer {i}")
 | 
					            print(f"Unpacking and permuting layer {i}")
 | 
				
			||||||
            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head_kv)
 | 
					            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
 | 
				
			||||||
            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
 | 
					            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
 | 
				
			||||||
            tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy        (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
 | 
					            tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy        (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user