mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	py : add Gemma conversion from HF models (#5647)
* py : add gemma conversion from HF models * Update convert-hf-to-gguf.py Co-authored-by: Aarni Koskela <akx@iki.fi> * Update convert-hf-to-gguf.py Co-authored-by: Aarni Koskela <akx@iki.fi> * Update convert-hf-to-gguf.py Co-authored-by: Jared Van Bortel <jared@nomic.ai> --------- Co-authored-by: Aarni Koskela <akx@iki.fi> Co-authored-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
		| @@ -7450,6 +7450,7 @@ struct llm_build_context { | ||||
|  | ||||
|         inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); | ||||
|         cb(inpL, "inp_embd", -1); | ||||
|  | ||||
|         inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd)); | ||||
|         cb(inpL, "inp_scaled", -1); | ||||
|  | ||||
| @@ -7491,6 +7492,7 @@ struct llm_build_context { | ||||
|                         n_embd_head_k, 2, 0, n_orig_ctx, freq_base, freq_scale, | ||||
|                         ext_factor, attn_factor, beta_fast, beta_slow); | ||||
|                 cb(Qcur, "Qcur", il); | ||||
|  | ||||
|                 Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); | ||||
|                 cb(Qcur, "Qcur_scaled", il); | ||||
|  | ||||
| @@ -7505,6 +7507,7 @@ struct llm_build_context { | ||||
|                         Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f, cb, il); | ||||
|                 cb(cur, "kqv_out", il); | ||||
|             } | ||||
|  | ||||
|             struct ggml_tensor * sa_out = ggml_add(ctx0, cur, inpL); | ||||
|             cb(sa_out, "sa_out", il); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov