mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	convert.py : 70b model working (change attn_q permute)
This commit is contained in:
		@@ -326,6 +326,7 @@ Vocab = Union[BpeVocab, SentencePieceVocab]
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
 | 
			
		||||
    #print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
 | 
			
		||||
    if n_head_kv is not None and n_head != n_head_kv:
 | 
			
		||||
        n_head //= n_head_kv
 | 
			
		||||
    return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
 | 
			
		||||
@@ -818,12 +819,12 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
 | 
			
		||||
    for i in itertools.count():
 | 
			
		||||
        if f"model.layers.{i}.self_attn.q_proj.weight" in model:
 | 
			
		||||
            print(f"Permuting layer {i}")
 | 
			
		||||
            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head_kv)
 | 
			
		||||
            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head)
 | 
			
		||||
            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
 | 
			
		||||
           #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
 | 
			
		||||
        elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
 | 
			
		||||
            print(f"Unpacking and permuting layer {i}")
 | 
			
		||||
            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head_kv)
 | 
			
		||||
            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
 | 
			
		||||
            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
 | 
			
		||||
            tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy        (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
 | 
			
		||||
        else:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user