mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-14 11:07:10 +00:00
model : add grok-2 support (#15539)
* add grok-2 support * type fix * type fix * type fix * "fix" vocab for invalid sequences * fix expert tensor mapping and spaces in vocab * add chat template * fix norm tensor mapping * rename layer_out_norm to ffn_post_norm * ensure ffn_post_norm is mapped * fix experts merging * remove erroneous FFN_GATE entry * concatenate split tensors and add more metadata * process all expert layers and try cat instead of hstack * add support for community BPE vocab * fix expert feed forward length and ffn_down concat * commit this too * add ffn_up/gate/down, unsure if sequence is right * add ffn_gate/down/up to tensor names * correct residual moe (still not working) * mess-- * fix embedding scale being applied twice * add built in chat template * change beta fast for grok if default value * remove spm vocab in favor of community bpe vocab * change attention temp length metadata type to integer * update attention temp length metadata * remove comment * replace M_SQRT2 with std::sqrt(2) * add yarn metadata, move defaults to hparams
This commit is contained in:
@@ -143,6 +143,7 @@ enum llm_kv {
|
||||
LLM_KV_DECODER_START_TOKEN_ID,
|
||||
LLM_KV_DECODER_BLOCK_COUNT,
|
||||
LLM_KV_ATTN_LOGIT_SOFTCAPPING,
|
||||
LLM_KV_ROUTER_LOGIT_SOFTCAPPING,
|
||||
LLM_KV_FINAL_LOGIT_SOFTCAPPING,
|
||||
LLM_KV_SWIN_NORM,
|
||||
LLM_KV_RESCALE_EVERY_N_LAYERS,
|
||||
@@ -173,6 +174,8 @@ enum llm_kv {
|
||||
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
|
||||
LLM_KV_ATTENTION_SLIDING_WINDOW,
|
||||
LLM_KV_ATTENTION_SCALE,
|
||||
LLM_KV_ATTENTION_OUTPUT_SCALE,
|
||||
LLM_KV_ATTENTION_TEMPERATURE_LENGTH,
|
||||
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
||||
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
||||
|
||||
@@ -186,6 +189,10 @@ enum llm_kv {
|
||||
LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
|
||||
LLM_KV_ROPE_SCALING_FINETUNED,
|
||||
LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
|
||||
LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR,
|
||||
LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR,
|
||||
LLM_KV_ROPE_SCALING_YARN_BETA_FAST,
|
||||
LLM_KV_ROPE_SCALING_YARN_BETA_SLOW,
|
||||
|
||||
LLM_KV_SPLIT_NO,
|
||||
LLM_KV_SPLIT_COUNT,
|
||||
|
||||
Reference in New Issue
Block a user