mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	model: EmbeddingGemma Adding Support for SentenceTransformers Dense Modules (#16367)
* model: EmbeddingGemma sentence-transformers dense linear projections support * model: add support for EmbeddingGemma SentenceTransformers dense linear projections Adding support for the Dense modules used in EmbeddingGemma models. EmbeddingGemma is a SentenceTransformers model with additional modules beyond the base Transformer backbone. See: https://developers.googleblog.com/en/gemma-explained-embeddinggemma-architecture-and-recipe/ * model: add support for EmbeddingGemma SentenceTransformers dense linear projections - converting model with dense-layers is optional - introduced dense config params * Update convert_hf_to_gguf.py Co-authored-by: Daniel Bevenius <daniel.bevenius@gmail.com> * fixed formatting issues * Update src/llama-graph.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * - removed pooling_type_opt, always allow overriding pooling_type - asserts checking dense features dims * fix python lint * fix ubuntu gcc build warning * - fixed thread-safety test - moved asserts to load_hparams * - tidying up code - simplifying graph-context expecting both dense weights * minor : add TODO --------- Co-authored-by: Daniel Bevenius <daniel.bevenius@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		| @@ -219,6 +219,11 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = { | ||||
|     { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" }, | ||||
|  | ||||
|     { LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" }, | ||||
|     // sentence-transformers dense modules feature dims | ||||
|     { LLM_KV_DENSE_2_FEAT_IN,        "%s.dense_2_feat_in"  }, | ||||
|     { LLM_KV_DENSE_2_FEAT_OUT,       "%s.dense_2_feat_out"  }, | ||||
|     { LLM_KV_DENSE_3_FEAT_IN,        "%s.dense_3_feat_in"   }, | ||||
|     { LLM_KV_DENSE_3_FEAT_OUT,       "%s.dense_3_feat_out"  }, | ||||
|  | ||||
|     { LLM_KV_TOKENIZER_MODEL,                "tokenizer.ggml.model"                    }, | ||||
|     { LLM_KV_TOKENIZER_PRE,                  "tokenizer.ggml.pre"                      }, | ||||
| @@ -1071,6 +1076,8 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" }, | ||||
|             { LLM_TENSOR_OUTPUT,          "output" }, | ||||
|             { LLM_TENSOR_DENSE_2_OUT,     "dense_2" }, | ||||
|             { LLM_TENSOR_DENSE_3_OUT,     "dense_3" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" }, | ||||
|             { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" }, | ||||
| @@ -2281,6 +2288,8 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = { | ||||
|     {LLM_TENSOR_OUTPUT,                     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_CLS,                        {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_CLS_OUT,                    {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_DENSE_2_OUT,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output | ||||
|     {LLM_TENSOR_DENSE_3_OUT,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output | ||||
|     {LLM_TENSOR_OUTPUT_NORM,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}}, | ||||
|     {LLM_TENSOR_DEC_OUTPUT_NORM,            {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}}, | ||||
|     {LLM_TENSOR_ENC_OUTPUT_NORM,            {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}}, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Saba Fallah
					Saba Fallah