mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	model : Nomic Embed Text V2 with Mixture-of-Experts (MoE) architecture (#12466)
* Nomic Embed Text V2 with Mixture-of-Experts (MoE) architecture - Adds MoE-based embedding model supporting multilingual embeddings. - Selects architecture variant based on hyperparameter detection (MoE layers). - Removes unnecessary subclass initialization checks for clarity. https://www.nomic.ai/blog/posts/nomic-embed-text-v2 Co-authored-by: Jared Van Bortel <jared@nomic.ai> * fix tokenizer * don't rename this tensor --------- Co-authored-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
		| @@ -19,6 +19,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { | ||||
|     { LLM_ARCH_REFACT,           "refact"           }, | ||||
|     { LLM_ARCH_BERT,             "bert"             }, | ||||
|     { LLM_ARCH_NOMIC_BERT,       "nomic-bert"       }, | ||||
|     { LLM_ARCH_NOMIC_BERT_MOE,   "nomic-bert-moe"   }, | ||||
|     { LLM_ARCH_JINA_BERT_V2,     "jina-bert-v2"     }, | ||||
|     { LLM_ARCH_BLOOM,            "bloom"            }, | ||||
|     { LLM_ARCH_STABLELM,         "stablelm"         }, | ||||
| @@ -106,6 +107,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = { | ||||
|     { LLM_KV_EXPERT_WEIGHTS_SCALE,              "%s.expert_weights_scale"              }, | ||||
|     { LLM_KV_EXPERT_WEIGHTS_NORM,               "%s.expert_weights_norm"               }, | ||||
|     { LLM_KV_EXPERT_GATING_FUNC,                "%s.expert_gating_func"                }, | ||||
|     { LLM_KV_MOE_EVERY_N_LAYERS,                "%s.moe_every_n_layers"                }, | ||||
|     { LLM_KV_POOLING_TYPE,                      "%s.pooling_type"                      }, | ||||
|     { LLM_KV_LOGIT_SCALE,                       "%s.logit_scale"                       }, | ||||
|     { LLM_KV_DECODER_START_TOKEN_ID,            "%s.decoder_start_token_id"            }, | ||||
| @@ -472,6 +474,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_NOMIC_BERT_MOE, | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" }, | ||||
|             { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" }, | ||||
|             { LLM_TENSOR_TOKEN_TYPES,     "token_types" }, | ||||
|             { LLM_TENSOR_ATTN_OUT_NORM,   "blk.%d.attn_output_norm" }, | ||||
|             { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" }, | ||||
|             { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" }, | ||||
|             { LLM_TENSOR_LAYER_OUT_NORM,  "blk.%d.layer_output_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" }, | ||||
|             { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" }, | ||||
|             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" }, | ||||
|             { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" }, | ||||
|             { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_JINA_BERT_V2, | ||||
|         { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 AT
					AT