mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	model : add grok-2 support (#15539)
* add grok-2 support * type fix * type fix * type fix * "fix" vocab for invalid sequences * fix expert tensor mapping and spaces in vocab * add chat template * fix norm tensor mapping * rename layer_out_norm to ffn_post_norm * ensure ffn_post_norm is mapped * fix experts merging * remove erroneous FFN_GATE entry * concatenate split tensors and add more metadata * process all expert layers and try cat instead of hstack * add support for community BPE vocab * fix expert feed forward length and ffn_down concat * commit this too * add ffn_up/gate/down, unsure if sequence is right * add ffn_gate/down/up to tensor names * correct residual moe (still not working) * mess-- * fix embedding scale being applied twice * add built in chat template * change beta fast for grok if default value * remove spm vocab in favor of community bpe vocab * change attention temp length metadata type to integer * update attention temp length metadata * remove comment * replace M_SQRT2 with std::sqrt(2) * add yarn metadata, move defaults to hparams
This commit is contained in:
		| @@ -139,6 +139,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = { | ||||
|     { LLM_KV_DECODER_START_TOKEN_ID,            "%s.decoder_start_token_id"            }, | ||||
|     { LLM_KV_DECODER_BLOCK_COUNT,               "%s.decoder_block_count"               }, | ||||
|     { LLM_KV_ATTN_LOGIT_SOFTCAPPING,            "%s.attn_logit_softcapping"            }, | ||||
|     { LLM_KV_ROUTER_LOGIT_SOFTCAPPING,          "%s.router_logit_softcapping"          }, | ||||
|     { LLM_KV_FINAL_LOGIT_SOFTCAPPING,           "%s.final_logit_softcapping"           }, | ||||
|     { LLM_KV_SWIN_NORM,                         "%s.swin_norm"                         }, | ||||
|     { LLM_KV_RESCALE_EVERY_N_LAYERS,            "%s.rescale_every_n_layers"            }, | ||||
| @@ -169,19 +170,25 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = { | ||||
|     { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,       "%s.attention.relative_buckets_count"       }, | ||||
|     { LLM_KV_ATTENTION_SLIDING_WINDOW,               "%s.attention.sliding_window"               }, | ||||
|     { LLM_KV_ATTENTION_SCALE,                        "%s.attention.scale"                        }, | ||||
|     { LLM_KV_ATTENTION_OUTPUT_SCALE,                 "%s.attention.output_scale"                 }, | ||||
|     { LLM_KV_ATTENTION_TEMPERATURE_LENGTH,           "%s.attention.temperature_length"           }, | ||||
|     { LLM_KV_ATTENTION_KEY_LENGTH_MLA,               "%s.attention.key_length_mla"               }, | ||||
|     { LLM_KV_ATTENTION_VALUE_LENGTH_MLA,             "%s.attention.value_length_mla"             }, | ||||
|  | ||||
|     { LLM_KV_ROPE_DIMENSION_COUNT,      "%s.rope.dimension_count"                 }, | ||||
|     { LLM_KV_ROPE_DIMENSION_SECTIONS,   "%s.rope.dimension_sections"              }, | ||||
|     { LLM_KV_ROPE_FREQ_BASE,            "%s.rope.freq_base"                       }, | ||||
|     { LLM_KV_ROPE_SCALE_LINEAR,         "%s.rope.scale_linear"                    }, | ||||
|     { LLM_KV_ROPE_SCALING_TYPE,         "%s.rope.scaling.type"                    }, | ||||
|     { LLM_KV_ROPE_SCALING_FACTOR,       "%s.rope.scaling.factor"                  }, | ||||
|     { LLM_KV_ROPE_SCALING_ATTN_FACTOR,  "%s.rope.scaling.attn_factor"             }, | ||||
|     { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" }, | ||||
|     { LLM_KV_ROPE_SCALING_FINETUNED,    "%s.rope.scaling.finetuned"               }, | ||||
|     { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier"     }, | ||||
|     { LLM_KV_ROPE_DIMENSION_COUNT,          "%s.rope.dimension_count"                 }, | ||||
|     { LLM_KV_ROPE_DIMENSION_SECTIONS,       "%s.rope.dimension_sections"              }, | ||||
|     { LLM_KV_ROPE_FREQ_BASE,                "%s.rope.freq_base"                       }, | ||||
|     { LLM_KV_ROPE_SCALE_LINEAR,             "%s.rope.scale_linear"                    }, | ||||
|     { LLM_KV_ROPE_SCALING_TYPE,             "%s.rope.scaling.type"                    }, | ||||
|     { LLM_KV_ROPE_SCALING_FACTOR,           "%s.rope.scaling.factor"                  }, | ||||
|     { LLM_KV_ROPE_SCALING_ATTN_FACTOR,      "%s.rope.scaling.attn_factor"             }, | ||||
|     { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,     "%s.rope.scaling.original_context_length" }, | ||||
|     { LLM_KV_ROPE_SCALING_FINETUNED,        "%s.rope.scaling.finetuned"               }, | ||||
|     { LLM_KV_ROPE_SCALING_YARN_LOG_MUL,     "%s.rope.scaling.yarn_log_multiplier"     }, | ||||
|     { LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR,  "%s.rope.scaling.yarn_ext_factor"         }, | ||||
|     { LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR, "%s.rope.scaling.yarn_attn_factor"        }, | ||||
|     { LLM_KV_ROPE_SCALING_YARN_BETA_FAST,   "%s.rope.scaling.yarn_beta_fast"          }, | ||||
|     { LLM_KV_ROPE_SCALING_YARN_BETA_SLOW,   "%s.rope.scaling.yarn_beta_slow"          }, | ||||
|  | ||||
|     { LLM_KV_SPLIT_NO,            "split.no"            }, | ||||
|     { LLM_KV_SPLIT_COUNT,         "split.count"         }, | ||||
| @@ -398,12 +405,16 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" }, | ||||
|             { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" }, | ||||
|             { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" }, | ||||
|             { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" }, | ||||
|             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" }, | ||||
|             { LLM_TENSOR_FFN_GATE_EXP,    "blk.%d.ffn_gate.%d" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_EXP,    "blk.%d.ffn_down.%d" }, | ||||
|             { LLM_TENSOR_FFN_UP_EXP,      "blk.%d.ffn_up.%d" }, | ||||
|             { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" }, | ||||
|             { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" }, | ||||
|             { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" }, | ||||
|             { LLM_TENSOR_LAYER_OUT_NORM,  "blk.%d.layer_output_norm" }, | ||||
|             { LLM_TENSOR_ATTN_OUT_NORM,   "blk.%d.attn_output_norm" }, | ||||
|         }, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sigbjørn Skjæret
					Sigbjørn Skjæret