mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	Merge branch 'master' into compilade/imatrix-batched-chunks
This commit is contained in:
		@@ -15,13 +15,15 @@ pip install gguf
 | 
			
		||||
 | 
			
		||||
[examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model.
 | 
			
		||||
 | 
			
		||||
[scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console.
 | 
			
		||||
[examples/reader.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/reader.py) — Extracts and displays key-value pairs and tensor details from a GGUF file in a readable format.
 | 
			
		||||
 | 
			
		||||
[scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key.
 | 
			
		||||
[gguf/scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console.
 | 
			
		||||
 | 
			
		||||
[scripts/gguf_convert_endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_convert_endian.py) — Allows converting the endianness of GGUF files.
 | 
			
		||||
[gguf/scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key.
 | 
			
		||||
 | 
			
		||||
[scripts/gguf_new_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_new_metadata.py) — Copies a GGUF file with added/modified/removed metadata values.
 | 
			
		||||
[gguf/scripts/gguf_convert_endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_convert_endian.py) — Allows converting the endianness of GGUF files.
 | 
			
		||||
 | 
			
		||||
[gguf/scripts/gguf_new_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_new_metadata.py) — Copies a GGUF file with added/modified/removed metadata values.
 | 
			
		||||
 | 
			
		||||
## Development
 | 
			
		||||
Maintainers who participate in development of this package are advised to install it in editable mode:
 | 
			
		||||
 
 | 
			
		||||
@@ -64,20 +64,33 @@ class Keys:
 | 
			
		||||
        BASE_MODEL_AUTHOR          = "general.base_model.{id}.author"
 | 
			
		||||
        BASE_MODEL_VERSION         = "general.base_model.{id}.version"
 | 
			
		||||
        BASE_MODEL_ORGANIZATION    = "general.base_model.{id}.organization"
 | 
			
		||||
        BASE_MODEL_DESCRIPTION     = "general.base_model.{id}.description"
 | 
			
		||||
        BASE_MODEL_URL             = "general.base_model.{id}.url" # Model Website/Paper
 | 
			
		||||
        BASE_MODEL_DOI             = "general.base_model.{id}.doi"
 | 
			
		||||
        BASE_MODEL_UUID            = "general.base_model.{id}.uuid"
 | 
			
		||||
        BASE_MODEL_REPO_URL        = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
 | 
			
		||||
 | 
			
		||||
        # Dataset Source
 | 
			
		||||
        DATASET_COUNT           = "general.dataset.count"
 | 
			
		||||
        DATASET_NAME            = "general.dataset.{id}.name"
 | 
			
		||||
        DATASET_AUTHOR          = "general.dataset.{id}.author"
 | 
			
		||||
        DATASET_VERSION         = "general.dataset.{id}.version"
 | 
			
		||||
        DATASET_ORGANIZATION    = "general.dataset.{id}.organization"
 | 
			
		||||
        DATASET_DESCRIPTION     = "general.dataset.{id}.description"
 | 
			
		||||
        DATASET_URL             = "general.dataset.{id}.url" # Model Website/Paper
 | 
			
		||||
        DATASET_DOI             = "general.dataset.{id}.doi"
 | 
			
		||||
        DATASET_UUID            = "general.dataset.{id}.uuid"
 | 
			
		||||
        DATASET_REPO_URL        = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
 | 
			
		||||
 | 
			
		||||
        # Array based KV stores
 | 
			
		||||
        TAGS                       = "general.tags"
 | 
			
		||||
        LANGUAGES                  = "general.languages"
 | 
			
		||||
        DATASETS                   = "general.datasets"
 | 
			
		||||
 | 
			
		||||
    class LLM:
 | 
			
		||||
        VOCAB_SIZE                        = "{arch}.vocab_size"
 | 
			
		||||
        CONTEXT_LENGTH                    = "{arch}.context_length"
 | 
			
		||||
        EMBEDDING_LENGTH                  = "{arch}.embedding_length"
 | 
			
		||||
        FEATURES_LENGTH                   = "{arch}.features_length"
 | 
			
		||||
        BLOCK_COUNT                       = "{arch}.block_count"
 | 
			
		||||
        LEADING_DENSE_BLOCK_COUNT         = "{arch}.leading_dense_block_count"
 | 
			
		||||
        FEED_FORWARD_LENGTH               = "{arch}.feed_forward_length"
 | 
			
		||||
@@ -89,14 +102,20 @@ class Keys:
 | 
			
		||||
        EXPERT_USED_COUNT                 = "{arch}.expert_used_count"
 | 
			
		||||
        EXPERT_SHARED_COUNT               = "{arch}.expert_shared_count"
 | 
			
		||||
        EXPERT_WEIGHTS_SCALE              = "{arch}.expert_weights_scale"
 | 
			
		||||
        EXPERT_WEIGHTS_NORM               = "{arch}.expert_weights_norm"
 | 
			
		||||
        EXPERT_GATING_FUNC                = "{arch}.expert_gating_func"
 | 
			
		||||
        POOLING_TYPE                      = "{arch}.pooling_type"
 | 
			
		||||
        LOGIT_SCALE                       = "{arch}.logit_scale"
 | 
			
		||||
        DECODER_START_TOKEN_ID            = "{arch}.decoder_start_token_id"
 | 
			
		||||
        ATTN_LOGIT_SOFTCAPPING            = "{arch}.attn_logit_softcapping"
 | 
			
		||||
        FINAL_LOGIT_SOFTCAPPING           = "{arch}.final_logit_softcapping"
 | 
			
		||||
        SWIN_NORM                         = "{arch}.swin_norm"
 | 
			
		||||
        RESCALE_EVERY_N_LAYERS            = "{arch}.rescale_every_n_layers"
 | 
			
		||||
        TIME_MIX_EXTRA_DIM                = "{arch}.time_mix_extra_dim"
 | 
			
		||||
        TIME_DECAY_EXTRA_DIM              = "{arch}.time_decay_extra_dim"
 | 
			
		||||
        RESIDUAL_SCALE                    = "{arch}.residual_scale"
 | 
			
		||||
        EMBEDDING_SCALE                   = "{arch}.embedding_scale"
 | 
			
		||||
        TOKEN_SHIFT_COUNT                 = "{arch}.token_shift_count"
 | 
			
		||||
 | 
			
		||||
    class Attention:
 | 
			
		||||
        HEAD_COUNT        = "{arch}.attention.head_count"
 | 
			
		||||
@@ -107,14 +126,18 @@ class Keys:
 | 
			
		||||
        VALUE_LENGTH      = "{arch}.attention.value_length"
 | 
			
		||||
        LAYERNORM_EPS     = "{arch}.attention.layer_norm_epsilon"
 | 
			
		||||
        LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
 | 
			
		||||
        GROUPNORM_EPS     = "{arch}.attention.group_norm_epsilon"
 | 
			
		||||
        GROUPNORM_GROUPS  = "{arch}.attention.group_norm_groups"
 | 
			
		||||
        CAUSAL            = "{arch}.attention.causal"
 | 
			
		||||
        Q_LORA_RANK       = "{arch}.attention.q_lora_rank"
 | 
			
		||||
        KV_LORA_RANK      = "{arch}.attention.kv_lora_rank"
 | 
			
		||||
        REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
 | 
			
		||||
        SLIDING_WINDOW    = "{arch}.attention.sliding_window"
 | 
			
		||||
        SCALE             = "{arch}.attention.scale"
 | 
			
		||||
 | 
			
		||||
    class Rope:
 | 
			
		||||
        DIMENSION_COUNT         = "{arch}.rope.dimension_count"
 | 
			
		||||
        DIMENSION_SECTIONS      = "{arch}.rope.dimension_sections"
 | 
			
		||||
        FREQ_BASE               = "{arch}.rope.freq_base"
 | 
			
		||||
        SCALING_TYPE            = "{arch}.rope.scaling.type"
 | 
			
		||||
        SCALING_FACTOR          = "{arch}.rope.scaling.factor"
 | 
			
		||||
@@ -138,6 +161,14 @@ class Keys:
 | 
			
		||||
    class WKV:
 | 
			
		||||
        HEAD_SIZE = "{arch}.wkv.head_size"
 | 
			
		||||
 | 
			
		||||
    class PosNet:
 | 
			
		||||
        EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
 | 
			
		||||
        BLOCK_COUNT      = "{arch}.posnet.block_count"
 | 
			
		||||
 | 
			
		||||
    class ConvNext:
 | 
			
		||||
        EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
 | 
			
		||||
        BLOCK_COUNT      = "{arch}.convnext.block_count"
 | 
			
		||||
 | 
			
		||||
    class Tokenizer:
 | 
			
		||||
        MODEL                = "tokenizer.ggml.model"
 | 
			
		||||
        PRE                  = "tokenizer.ggml.pre"
 | 
			
		||||
@@ -148,10 +179,11 @@ class Keys:
 | 
			
		||||
        MERGES               = "tokenizer.ggml.merges"
 | 
			
		||||
        BOS_ID               = "tokenizer.ggml.bos_token_id"
 | 
			
		||||
        EOS_ID               = "tokenizer.ggml.eos_token_id"
 | 
			
		||||
        EOT_ID               = "tokenizer.ggml.eot_token_id"
 | 
			
		||||
        EOM_ID               = "tokenizer.ggml.eom_token_id"
 | 
			
		||||
        UNK_ID               = "tokenizer.ggml.unknown_token_id"
 | 
			
		||||
        SEP_ID               = "tokenizer.ggml.seperator_token_id"
 | 
			
		||||
        PAD_ID               = "tokenizer.ggml.padding_token_id"
 | 
			
		||||
        CLS_ID               = "tokenizer.ggml.cls_token_id"
 | 
			
		||||
        MASK_ID              = "tokenizer.ggml.mask_token_id"
 | 
			
		||||
        ADD_BOS              = "tokenizer.ggml.add_bos_token"
 | 
			
		||||
        ADD_EOS              = "tokenizer.ggml.add_eos_token"
 | 
			
		||||
@@ -164,11 +196,16 @@ class Keys:
 | 
			
		||||
        CHAT_TEMPLATE_N      = "tokenizer.chat_template.{name}"
 | 
			
		||||
        CHAT_TEMPLATES       = "tokenizer.chat_templates"
 | 
			
		||||
        # FIM/Infill special tokens constants
 | 
			
		||||
        FIM_PRE_ID           = "tokenizer.ggml.fim_pre_token_id"
 | 
			
		||||
        FIM_SUF_ID           = "tokenizer.ggml.fim_suf_token_id"
 | 
			
		||||
        FIM_MID_ID           = "tokenizer.ggml.fim_mid_token_id"
 | 
			
		||||
        FIM_PAD_ID           = "tokenizer.ggml.fim_pad_token_id"
 | 
			
		||||
        FIM_REP_ID           = "tokenizer.ggml.fim_rep_token_id"
 | 
			
		||||
        FIM_SEP_ID           = "tokenizer.ggml.fim_sep_token_id"
 | 
			
		||||
        # deprecated:
 | 
			
		||||
        PREFIX_ID            = "tokenizer.ggml.prefix_token_id"
 | 
			
		||||
        SUFFIX_ID            = "tokenizer.ggml.suffix_token_id"
 | 
			
		||||
        MIDDLE_ID            = "tokenizer.ggml.middle_token_id"
 | 
			
		||||
        EOT_ID               = "tokenizer.ggml.eot_token_id"
 | 
			
		||||
        EOM_ID               = "tokenizer.ggml.eom_token_id"
 | 
			
		||||
 | 
			
		||||
    class Adapter:
 | 
			
		||||
        TYPE       = "adapter.type"
 | 
			
		||||
@@ -192,50 +229,63 @@ class GGUFType:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MODEL_ARCH(IntEnum):
 | 
			
		||||
    LLAMA        = auto()
 | 
			
		||||
    FALCON       = auto()
 | 
			
		||||
    BAICHUAN     = auto()
 | 
			
		||||
    GROK         = auto()
 | 
			
		||||
    GPT2         = auto()
 | 
			
		||||
    GPTJ         = auto()
 | 
			
		||||
    GPTNEOX      = auto()
 | 
			
		||||
    MPT          = auto()
 | 
			
		||||
    STARCODER    = auto()
 | 
			
		||||
    REFACT       = auto()
 | 
			
		||||
    BERT         = auto()
 | 
			
		||||
    NOMIC_BERT   = auto()
 | 
			
		||||
    JINA_BERT_V2 = auto()
 | 
			
		||||
    BLOOM        = auto()
 | 
			
		||||
    STABLELM     = auto()
 | 
			
		||||
    QWEN         = auto()
 | 
			
		||||
    QWEN2        = auto()
 | 
			
		||||
    QWEN2MOE     = auto()
 | 
			
		||||
    PHI2         = auto()
 | 
			
		||||
    PHI3         = auto()
 | 
			
		||||
    PLAMO        = auto()
 | 
			
		||||
    CODESHELL    = auto()
 | 
			
		||||
    ORION        = auto()
 | 
			
		||||
    INTERNLM2    = auto()
 | 
			
		||||
    MINICPM      = auto()
 | 
			
		||||
    GEMMA        = auto()
 | 
			
		||||
    GEMMA2       = auto()
 | 
			
		||||
    STARCODER2   = auto()
 | 
			
		||||
    RWKV6        = auto()
 | 
			
		||||
    MAMBA        = auto()
 | 
			
		||||
    XVERSE       = auto()
 | 
			
		||||
    COMMAND_R    = auto()
 | 
			
		||||
    DBRX         = auto()
 | 
			
		||||
    OLMO         = auto()
 | 
			
		||||
    OPENELM      = auto()
 | 
			
		||||
    ARCTIC       = auto()
 | 
			
		||||
    DEEPSEEK2    = auto()
 | 
			
		||||
    CHATGLM      = auto()
 | 
			
		||||
    BITNET       = auto()
 | 
			
		||||
    T5           = auto()
 | 
			
		||||
    T5ENCODER    = auto()
 | 
			
		||||
    JAIS         = auto()
 | 
			
		||||
    NEMOTRON     = auto()
 | 
			
		||||
    EXAONE       = auto()
 | 
			
		||||
    LLAMA            = auto()
 | 
			
		||||
    DECI             = auto()
 | 
			
		||||
    FALCON           = auto()
 | 
			
		||||
    BAICHUAN         = auto()
 | 
			
		||||
    GROK             = auto()
 | 
			
		||||
    GPT2             = auto()
 | 
			
		||||
    GPTJ             = auto()
 | 
			
		||||
    GPTNEOX          = auto()
 | 
			
		||||
    MPT              = auto()
 | 
			
		||||
    STARCODER        = auto()
 | 
			
		||||
    REFACT           = auto()
 | 
			
		||||
    BERT             = auto()
 | 
			
		||||
    NOMIC_BERT       = auto()
 | 
			
		||||
    JINA_BERT_V2     = auto()
 | 
			
		||||
    BLOOM            = auto()
 | 
			
		||||
    STABLELM         = auto()
 | 
			
		||||
    QWEN             = auto()
 | 
			
		||||
    QWEN2            = auto()
 | 
			
		||||
    QWEN2MOE         = auto()
 | 
			
		||||
    QWEN2VL          = auto()
 | 
			
		||||
    PHI2             = auto()
 | 
			
		||||
    PHI3             = auto()
 | 
			
		||||
    PHIMOE           = auto()
 | 
			
		||||
    PLAMO            = auto()
 | 
			
		||||
    CODESHELL        = auto()
 | 
			
		||||
    ORION            = auto()
 | 
			
		||||
    INTERNLM2        = auto()
 | 
			
		||||
    MINICPM          = auto()
 | 
			
		||||
    MINICPM3         = auto()
 | 
			
		||||
    GEMMA            = auto()
 | 
			
		||||
    GEMMA2           = auto()
 | 
			
		||||
    STARCODER2       = auto()
 | 
			
		||||
    RWKV6            = auto()
 | 
			
		||||
    RWKV6QWEN2       = auto()
 | 
			
		||||
    MAMBA            = auto()
 | 
			
		||||
    XVERSE           = auto()
 | 
			
		||||
    COMMAND_R        = auto()
 | 
			
		||||
    COHERE2          = auto()
 | 
			
		||||
    DBRX             = auto()
 | 
			
		||||
    OLMO             = auto()
 | 
			
		||||
    OLMO2            = auto()
 | 
			
		||||
    OLMOE            = auto()
 | 
			
		||||
    OPENELM          = auto()
 | 
			
		||||
    ARCTIC           = auto()
 | 
			
		||||
    DEEPSEEK         = auto()
 | 
			
		||||
    DEEPSEEK2        = auto()
 | 
			
		||||
    CHATGLM          = auto()
 | 
			
		||||
    BITNET           = auto()
 | 
			
		||||
    T5               = auto()
 | 
			
		||||
    T5ENCODER        = auto()
 | 
			
		||||
    JAIS             = auto()
 | 
			
		||||
    NEMOTRON         = auto()
 | 
			
		||||
    EXAONE           = auto()
 | 
			
		||||
    GRANITE          = auto()
 | 
			
		||||
    GRANITE_MOE      = auto()
 | 
			
		||||
    CHAMELEON        = auto()
 | 
			
		||||
    WAVTOKENIZER_DEC = auto()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MODEL_TENSOR(IntEnum):
 | 
			
		||||
@@ -274,6 +324,7 @@ class MODEL_TENSOR(IntEnum):
 | 
			
		||||
    FFN_GATE_SHEXP       = auto()
 | 
			
		||||
    FFN_DOWN_SHEXP       = auto()
 | 
			
		||||
    FFN_UP_SHEXP         = auto()
 | 
			
		||||
    FFN_EXP_PROBS_B      = auto()
 | 
			
		||||
    ATTN_Q_NORM          = auto()
 | 
			
		||||
    ATTN_K_NORM          = auto()
 | 
			
		||||
    LAYER_OUT_NORM       = auto()
 | 
			
		||||
@@ -291,6 +342,7 @@ class MODEL_TENSOR(IntEnum):
 | 
			
		||||
    TIME_MIX_LERP_V      = auto()
 | 
			
		||||
    TIME_MIX_LERP_R      = auto()
 | 
			
		||||
    TIME_MIX_LERP_G      = auto()
 | 
			
		||||
    TIME_MIX_LERP_FUSED  = auto()
 | 
			
		||||
    TIME_MIX_LERP_W      = auto()
 | 
			
		||||
    TIME_MIX_FIRST       = auto()
 | 
			
		||||
    TIME_MIX_DECAY       = auto()
 | 
			
		||||
@@ -343,53 +395,84 @@ class MODEL_TENSOR(IntEnum):
 | 
			
		||||
    ENC_FFN_DOWN         = auto()
 | 
			
		||||
    ENC_FFN_UP           = auto()
 | 
			
		||||
    ENC_OUTPUT_NORM      = auto()
 | 
			
		||||
    CLS                  = auto() # classifier
 | 
			
		||||
    CLS_OUT              = auto() # classifier output projection
 | 
			
		||||
    CONV1D               = auto()
 | 
			
		||||
    CONVNEXT_DW          = auto()
 | 
			
		||||
    CONVNEXT_NORM        = auto()
 | 
			
		||||
    CONVNEXT_PW1         = auto()
 | 
			
		||||
    CONVNEXT_PW2         = auto()
 | 
			
		||||
    CONVNEXT_GAMMA       = auto()
 | 
			
		||||
    POSNET_CONV1         = auto()
 | 
			
		||||
    POSNET_CONV2         = auto()
 | 
			
		||||
    POSNET_NORM          = auto()
 | 
			
		||||
    POSNET_NORM1         = auto()
 | 
			
		||||
    POSNET_NORM2         = auto()
 | 
			
		||||
    POSNET_ATTN_NORM     = auto()
 | 
			
		||||
    POSNET_ATTN_Q        = auto()
 | 
			
		||||
    POSNET_ATTN_K        = auto()
 | 
			
		||||
    POSNET_ATTN_V        = auto()
 | 
			
		||||
    POSNET_ATTN_OUT      = auto()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
 | 
			
		||||
    MODEL_ARCH.LLAMA:          "llama",
 | 
			
		||||
    MODEL_ARCH.FALCON:         "falcon",
 | 
			
		||||
    MODEL_ARCH.BAICHUAN:       "baichuan",
 | 
			
		||||
    MODEL_ARCH.GROK:           "grok",
 | 
			
		||||
    MODEL_ARCH.GPT2:           "gpt2",
 | 
			
		||||
    MODEL_ARCH.GPTJ:           "gptj",
 | 
			
		||||
    MODEL_ARCH.GPTNEOX:        "gptneox",
 | 
			
		||||
    MODEL_ARCH.MPT:            "mpt",
 | 
			
		||||
    MODEL_ARCH.STARCODER:      "starcoder",
 | 
			
		||||
    MODEL_ARCH.REFACT:         "refact",
 | 
			
		||||
    MODEL_ARCH.BERT:           "bert",
 | 
			
		||||
    MODEL_ARCH.NOMIC_BERT:     "nomic-bert",
 | 
			
		||||
    MODEL_ARCH.JINA_BERT_V2:   "jina-bert-v2",
 | 
			
		||||
    MODEL_ARCH.BLOOM:          "bloom",
 | 
			
		||||
    MODEL_ARCH.STABLELM:       "stablelm",
 | 
			
		||||
    MODEL_ARCH.QWEN:           "qwen",
 | 
			
		||||
    MODEL_ARCH.QWEN2:          "qwen2",
 | 
			
		||||
    MODEL_ARCH.QWEN2MOE:       "qwen2moe",
 | 
			
		||||
    MODEL_ARCH.PHI2:           "phi2",
 | 
			
		||||
    MODEL_ARCH.PHI3:           "phi3",
 | 
			
		||||
    MODEL_ARCH.PLAMO:          "plamo",
 | 
			
		||||
    MODEL_ARCH.CODESHELL:      "codeshell",
 | 
			
		||||
    MODEL_ARCH.ORION:          "orion",
 | 
			
		||||
    MODEL_ARCH.INTERNLM2:      "internlm2",
 | 
			
		||||
    MODEL_ARCH.MINICPM:        "minicpm",
 | 
			
		||||
    MODEL_ARCH.GEMMA:          "gemma",
 | 
			
		||||
    MODEL_ARCH.GEMMA2:         "gemma2",
 | 
			
		||||
    MODEL_ARCH.STARCODER2:     "starcoder2",
 | 
			
		||||
    MODEL_ARCH.RWKV6:          "rwkv6",
 | 
			
		||||
    MODEL_ARCH.MAMBA:          "mamba",
 | 
			
		||||
    MODEL_ARCH.XVERSE:         "xverse",
 | 
			
		||||
    MODEL_ARCH.COMMAND_R:      "command-r",
 | 
			
		||||
    MODEL_ARCH.DBRX:           "dbrx",
 | 
			
		||||
    MODEL_ARCH.OLMO:           "olmo",
 | 
			
		||||
    MODEL_ARCH.OPENELM:        "openelm",
 | 
			
		||||
    MODEL_ARCH.ARCTIC:         "arctic",
 | 
			
		||||
    MODEL_ARCH.DEEPSEEK2:      "deepseek2",
 | 
			
		||||
    MODEL_ARCH.CHATGLM:        "chatglm",
 | 
			
		||||
    MODEL_ARCH.BITNET:         "bitnet",
 | 
			
		||||
    MODEL_ARCH.T5:             "t5",
 | 
			
		||||
    MODEL_ARCH.T5ENCODER:      "t5encoder",
 | 
			
		||||
    MODEL_ARCH.JAIS:           "jais",
 | 
			
		||||
    MODEL_ARCH.NEMOTRON:       "nemotron",
 | 
			
		||||
    MODEL_ARCH.EXAONE:         "exaone",
 | 
			
		||||
    MODEL_ARCH.LLAMA:            "llama",
 | 
			
		||||
    MODEL_ARCH.DECI:             "deci",
 | 
			
		||||
    MODEL_ARCH.FALCON:           "falcon",
 | 
			
		||||
    MODEL_ARCH.BAICHUAN:         "baichuan",
 | 
			
		||||
    MODEL_ARCH.GROK:             "grok",
 | 
			
		||||
    MODEL_ARCH.GPT2:             "gpt2",
 | 
			
		||||
    MODEL_ARCH.GPTJ:             "gptj",
 | 
			
		||||
    MODEL_ARCH.GPTNEOX:          "gptneox",
 | 
			
		||||
    MODEL_ARCH.MPT:              "mpt",
 | 
			
		||||
    MODEL_ARCH.STARCODER:        "starcoder",
 | 
			
		||||
    MODEL_ARCH.REFACT:           "refact",
 | 
			
		||||
    MODEL_ARCH.BERT:             "bert",
 | 
			
		||||
    MODEL_ARCH.NOMIC_BERT:       "nomic-bert",
 | 
			
		||||
    MODEL_ARCH.JINA_BERT_V2:     "jina-bert-v2",
 | 
			
		||||
    MODEL_ARCH.BLOOM:            "bloom",
 | 
			
		||||
    MODEL_ARCH.STABLELM:         "stablelm",
 | 
			
		||||
    MODEL_ARCH.QWEN:             "qwen",
 | 
			
		||||
    MODEL_ARCH.QWEN2:            "qwen2",
 | 
			
		||||
    MODEL_ARCH.QWEN2MOE:         "qwen2moe",
 | 
			
		||||
    MODEL_ARCH.QWEN2VL:          "qwen2vl",
 | 
			
		||||
    MODEL_ARCH.PHI2:             "phi2",
 | 
			
		||||
    MODEL_ARCH.PHI3:             "phi3",
 | 
			
		||||
    MODEL_ARCH.PHIMOE:           "phimoe",
 | 
			
		||||
    MODEL_ARCH.PLAMO:            "plamo",
 | 
			
		||||
    MODEL_ARCH.CODESHELL:        "codeshell",
 | 
			
		||||
    MODEL_ARCH.ORION:            "orion",
 | 
			
		||||
    MODEL_ARCH.INTERNLM2:        "internlm2",
 | 
			
		||||
    MODEL_ARCH.MINICPM:          "minicpm",
 | 
			
		||||
    MODEL_ARCH.MINICPM3:         "minicpm3",
 | 
			
		||||
    MODEL_ARCH.GEMMA:            "gemma",
 | 
			
		||||
    MODEL_ARCH.GEMMA2:           "gemma2",
 | 
			
		||||
    MODEL_ARCH.STARCODER2:       "starcoder2",
 | 
			
		||||
    MODEL_ARCH.RWKV6:            "rwkv6",
 | 
			
		||||
    MODEL_ARCH.RWKV6QWEN2:       "rwkv6qwen2",
 | 
			
		||||
    MODEL_ARCH.MAMBA:            "mamba",
 | 
			
		||||
    MODEL_ARCH.XVERSE:           "xverse",
 | 
			
		||||
    MODEL_ARCH.COMMAND_R:        "command-r",
 | 
			
		||||
    MODEL_ARCH.COHERE2:          "cohere2",
 | 
			
		||||
    MODEL_ARCH.DBRX:             "dbrx",
 | 
			
		||||
    MODEL_ARCH.OLMO:             "olmo",
 | 
			
		||||
    MODEL_ARCH.OLMO2:            "olmo2",
 | 
			
		||||
    MODEL_ARCH.OLMOE:            "olmoe",
 | 
			
		||||
    MODEL_ARCH.OPENELM:          "openelm",
 | 
			
		||||
    MODEL_ARCH.ARCTIC:           "arctic",
 | 
			
		||||
    MODEL_ARCH.DEEPSEEK:         "deepseek",
 | 
			
		||||
    MODEL_ARCH.DEEPSEEK2:        "deepseek2",
 | 
			
		||||
    MODEL_ARCH.CHATGLM:          "chatglm",
 | 
			
		||||
    MODEL_ARCH.BITNET:           "bitnet",
 | 
			
		||||
    MODEL_ARCH.T5:               "t5",
 | 
			
		||||
    MODEL_ARCH.T5ENCODER:        "t5encoder",
 | 
			
		||||
    MODEL_ARCH.JAIS:             "jais",
 | 
			
		||||
    MODEL_ARCH.NEMOTRON:         "nemotron",
 | 
			
		||||
    MODEL_ARCH.EXAONE:           "exaone",
 | 
			
		||||
    MODEL_ARCH.GRANITE:          "granite",
 | 
			
		||||
    MODEL_ARCH.GRANITE_MOE:      "granitemoe",
 | 
			
		||||
    MODEL_ARCH.CHAMELEON:        "chameleon",
 | 
			
		||||
    MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
 | 
			
		||||
@@ -430,6 +513,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
 | 
			
		||||
    MODEL_TENSOR.FFN_GATE_EXP:              "blk.{bid}.ffn_gate_exps",
 | 
			
		||||
    MODEL_TENSOR.FFN_DOWN_EXP:              "blk.{bid}.ffn_down_exps",
 | 
			
		||||
    MODEL_TENSOR.FFN_UP_EXP:                "blk.{bid}.ffn_up_exps",
 | 
			
		||||
    MODEL_TENSOR.FFN_EXP_PROBS_B:           "blk.{bid}.exp_probs_b",
 | 
			
		||||
    MODEL_TENSOR.LAYER_OUT_NORM:            "blk.{bid}.layer_output_norm",
 | 
			
		||||
    MODEL_TENSOR.SSM_IN:                    "blk.{bid}.ssm_in",
 | 
			
		||||
    MODEL_TENSOR.SSM_CONV1D:                "blk.{bid}.ssm_conv1d",
 | 
			
		||||
@@ -445,6 +529,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
 | 
			
		||||
    MODEL_TENSOR.TIME_MIX_LERP_V:           "blk.{bid}.time_mix_lerp_v",
 | 
			
		||||
    MODEL_TENSOR.TIME_MIX_LERP_R:           "blk.{bid}.time_mix_lerp_r",
 | 
			
		||||
    MODEL_TENSOR.TIME_MIX_LERP_G:           "blk.{bid}.time_mix_lerp_g",
 | 
			
		||||
    MODEL_TENSOR.TIME_MIX_LERP_FUSED:       "blk.{bid}.time_mix_lerp_fused",
 | 
			
		||||
    MODEL_TENSOR.TIME_MIX_LERP_W:           "blk.{bid}.time_mix_lerp_w",
 | 
			
		||||
    MODEL_TENSOR.TIME_MIX_FIRST:            "blk.{bid}.time_mix_first",
 | 
			
		||||
    MODEL_TENSOR.TIME_MIX_DECAY:            "blk.{bid}.time_mix_decay",
 | 
			
		||||
@@ -497,6 +582,24 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
 | 
			
		||||
    MODEL_TENSOR.ENC_FFN_DOWN:              "enc.blk.{bid}.ffn_down",
 | 
			
		||||
    MODEL_TENSOR.ENC_FFN_UP:                "enc.blk.{bid}.ffn_up",
 | 
			
		||||
    MODEL_TENSOR.ENC_OUTPUT_NORM:           "enc.output_norm",
 | 
			
		||||
    MODEL_TENSOR.CLS:                       "cls",
 | 
			
		||||
    MODEL_TENSOR.CLS_OUT:                   "cls.output",
 | 
			
		||||
    MODEL_TENSOR.CONV1D:                    "conv1d",
 | 
			
		||||
    MODEL_TENSOR.CONVNEXT_DW:               "convnext.{bid}.dw",
 | 
			
		||||
    MODEL_TENSOR.CONVNEXT_NORM:             "convnext.{bid}.norm",
 | 
			
		||||
    MODEL_TENSOR.CONVNEXT_PW1:              "convnext.{bid}.pw1",
 | 
			
		||||
    MODEL_TENSOR.CONVNEXT_PW2:              "convnext.{bid}.pw2",
 | 
			
		||||
    MODEL_TENSOR.CONVNEXT_GAMMA:            "convnext.{bid}.gamma",
 | 
			
		||||
    MODEL_TENSOR.POSNET_CONV1:              "posnet.{bid}.conv1",
 | 
			
		||||
    MODEL_TENSOR.POSNET_CONV2:              "posnet.{bid}.conv2",
 | 
			
		||||
    MODEL_TENSOR.POSNET_NORM:               "posnet.{bid}.norm",
 | 
			
		||||
    MODEL_TENSOR.POSNET_NORM1:              "posnet.{bid}.norm1",
 | 
			
		||||
    MODEL_TENSOR.POSNET_NORM2:              "posnet.{bid}.norm2",
 | 
			
		||||
    MODEL_TENSOR.POSNET_ATTN_NORM:          "posnet.{bid}.attn_norm",
 | 
			
		||||
    MODEL_TENSOR.POSNET_ATTN_Q:             "posnet.{bid}.attn_q",
 | 
			
		||||
    MODEL_TENSOR.POSNET_ATTN_K:             "posnet.{bid}.attn_k",
 | 
			
		||||
    MODEL_TENSOR.POSNET_ATTN_V:             "posnet.{bid}.attn_v",
 | 
			
		||||
    MODEL_TENSOR.POSNET_ATTN_OUT:           "posnet.{bid}.attn_output",
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
@@ -520,6 +623,26 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_EXP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.DECI: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FREQS,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_ROT_EMBD,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_INP,
 | 
			
		||||
        MODEL_TENSOR.FFN_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_EXP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.GROK: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
@@ -606,6 +729,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
        MODEL_TENSOR.LAYER_OUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.CLS,
 | 
			
		||||
        MODEL_TENSOR.CLS_OUT,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.NOMIC_BERT: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
@@ -637,6 +762,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.LAYER_OUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.CLS,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.MPT: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
@@ -723,6 +849,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.QWEN2: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FREQS,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.FFN_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.QWEN2VL: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
@@ -800,6 +941,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FACTORS_LONG,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FACTORS_SHORT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_QKV,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
@@ -810,6 +953,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.PHIMOE: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FACTORS_LONG,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FACTORS_SHORT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_QKV,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.FFN_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_INP,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_EXP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.CODESHELL: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.POS_EMBD,
 | 
			
		||||
@@ -859,6 +1020,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FREQS,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FACTORS_LONG,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FACTORS_SHORT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
@@ -874,6 +1037,25 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_EXP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.MINICPM3: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FACTORS_LONG,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FACTORS_SHORT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q_A,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q_B,
 | 
			
		||||
        MODEL_TENSOR.ATTN_KV_A_MQA,
 | 
			
		||||
        MODEL_TENSOR.ATTN_KV_B,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q_A_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_KV_A_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.FFN_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.GEMMA: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
@@ -932,6 +1114,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_R,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_G,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_W,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_FUSED,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_FIRST,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_DECAY,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_DECAY_W1,
 | 
			
		||||
@@ -948,6 +1131,35 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
 | 
			
		||||
        MODEL_TENSOR.CHANNEL_MIX_VALUE,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.RWKV6QWEN2: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_W1,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_W2,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_X,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_K,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_V,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_R,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_G,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_W,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_FUSED,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_FIRST,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_DECAY,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_DECAY_W1,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_DECAY_W2,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_KEY,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_VALUE,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_RECEPTANCE,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_GATE,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LN,
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.FFN_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.MAMBA: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
@@ -991,6 +1203,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.ATTN_K_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q_NORM,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.COHERE2: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.DBRX: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
@@ -1015,6 +1239,39 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.OLMO2: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_POST_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_POST_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.OLMOE: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_INP,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_EXP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.OPENELM: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
@@ -1049,6 +1306,29 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_EXP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.DEEPSEEK: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ROPE_FREQS,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_ROT_EMBD,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_INP,
 | 
			
		||||
        MODEL_TENSOR.FFN_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_SHEXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_SHEXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_SHEXP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.DEEPSEEK2: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
@@ -1075,6 +1355,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_SHEXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_SHEXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_SHEXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_EXP_PROBS_B,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.CHATGLM : [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
@@ -1193,6 +1474,73 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.GRANITE: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.FFN_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.GRANITE_MOE: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.FFN_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_INP,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_EXP,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_EXP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.CHAMELEON: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.ATTN_K_NORM,
 | 
			
		||||
        MODEL_TENSOR.ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.ATTN_OUT,
 | 
			
		||||
        MODEL_TENSOR.FFN_NORM,
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE,
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN,
 | 
			
		||||
        MODEL_TENSOR.FFN_UP,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.WAVTOKENIZER_DEC: [
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD,
 | 
			
		||||
        MODEL_TENSOR.TOKEN_EMBD_NORM,
 | 
			
		||||
        MODEL_TENSOR.CONV1D,
 | 
			
		||||
        MODEL_TENSOR.CONVNEXT_DW,
 | 
			
		||||
        MODEL_TENSOR.CONVNEXT_NORM,
 | 
			
		||||
        MODEL_TENSOR.CONVNEXT_PW1,
 | 
			
		||||
        MODEL_TENSOR.CONVNEXT_PW2,
 | 
			
		||||
        MODEL_TENSOR.CONVNEXT_GAMMA,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT,
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM,
 | 
			
		||||
        MODEL_TENSOR.POSNET_CONV1,
 | 
			
		||||
        MODEL_TENSOR.POSNET_CONV2,
 | 
			
		||||
        MODEL_TENSOR.POSNET_NORM,
 | 
			
		||||
        MODEL_TENSOR.POSNET_NORM1,
 | 
			
		||||
        MODEL_TENSOR.POSNET_NORM2,
 | 
			
		||||
        MODEL_TENSOR.POSNET_ATTN_NORM,
 | 
			
		||||
        MODEL_TENSOR.POSNET_ATTN_Q,
 | 
			
		||||
        MODEL_TENSOR.POSNET_ATTN_K,
 | 
			
		||||
        MODEL_TENSOR.POSNET_ATTN_V,
 | 
			
		||||
        MODEL_TENSOR.POSNET_ATTN_OUT,
 | 
			
		||||
    ],
 | 
			
		||||
    # TODO
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -1202,6 +1550,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.ROPE_FREQS,
 | 
			
		||||
        MODEL_TENSOR.ATTN_ROT_EMBD,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.DECI: [
 | 
			
		||||
        MODEL_TENSOR.ROPE_FREQS,
 | 
			
		||||
        MODEL_TENSOR.ATTN_ROT_EMBD,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.BAICHUAN: [
 | 
			
		||||
        MODEL_TENSOR.ROPE_FREQS,
 | 
			
		||||
        MODEL_TENSOR.ATTN_ROT_EMBD,
 | 
			
		||||
@@ -1226,6 +1578,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
 | 
			
		||||
        MODEL_TENSOR.ROPE_FREQS,
 | 
			
		||||
        MODEL_TENSOR.ATTN_ROT_EMBD,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.DEEPSEEK: [
 | 
			
		||||
        MODEL_TENSOR.ROPE_FREQS,
 | 
			
		||||
        MODEL_TENSOR.ATTN_ROT_EMBD,
 | 
			
		||||
    ],
 | 
			
		||||
    MODEL_ARCH.DEEPSEEK2: [
 | 
			
		||||
        MODEL_TENSOR.ROPE_FREQS,
 | 
			
		||||
        MODEL_TENSOR.ATTN_ROT_EMBD,
 | 
			
		||||
@@ -1254,9 +1610,10 @@ class TokenType(IntEnum):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RopeScalingType(Enum):
 | 
			
		||||
    NONE   = 'none'
 | 
			
		||||
    LINEAR = 'linear'
 | 
			
		||||
    YARN   = 'yarn'
 | 
			
		||||
    NONE     = 'none'
 | 
			
		||||
    LINEAR   = 'linear'
 | 
			
		||||
    YARN     = 'yarn'
 | 
			
		||||
    LONGROPE = 'longrope'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PoolingType(IntEnum):
 | 
			
		||||
@@ -1295,13 +1652,15 @@ class GGMLQuantizationType(IntEnum):
 | 
			
		||||
    F64     = 28
 | 
			
		||||
    IQ1_M   = 29
 | 
			
		||||
    BF16    = 30
 | 
			
		||||
    Q4_0_4_4 = 31
 | 
			
		||||
    Q4_0_4_8 = 32
 | 
			
		||||
    Q4_0_8_8 = 33
 | 
			
		||||
    TQ1_0   = 34
 | 
			
		||||
    TQ2_0   = 35
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ExpertGatingFuncType(IntEnum):
 | 
			
		||||
    SOFTMAX  = 1
 | 
			
		||||
    SIGMOID  = 2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# TODO: add GGMLFileType from ggml_ftype in ggml.h
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -1341,9 +1700,9 @@ class LlamaFileType(IntEnum):
 | 
			
		||||
    MOSTLY_IQ4_XS        = 30  # except 1d tensors
 | 
			
		||||
    MOSTLY_IQ1_M         = 31  # except 1d tensors
 | 
			
		||||
    MOSTLY_BF16          = 32  # except 1d tensors
 | 
			
		||||
    MOSTLY_Q4_0_4_4      = 33  # except 1d tensors
 | 
			
		||||
    MOSTLY_Q4_0_4_8      = 34  # except 1d tensors
 | 
			
		||||
    MOSTLY_Q4_0_8_8      = 35  # except 1d tensors
 | 
			
		||||
    # MOSTLY_Q4_0_4_4      = 33  # removed from gguf files, use Q4_0 and runtime repack
 | 
			
		||||
    # MOSTLY_Q4_0_4_8      = 34  # removed from gguf files, use Q4_0 and runtime repack
 | 
			
		||||
    # MOSTLY_Q4_0_8_8      = 35  # removed from gguf files, use Q4_0 and runtime repack
 | 
			
		||||
    MOSTLY_TQ1_0         = 36  # except 1d tensors
 | 
			
		||||
    MOSTLY_TQ2_0         = 37  # except 1d tensors
 | 
			
		||||
 | 
			
		||||
@@ -1419,9 +1778,6 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
 | 
			
		||||
    GGMLQuantizationType.F64:     (1, 8),
 | 
			
		||||
    GGMLQuantizationType.IQ1_M:   (256, QK_K // 8 + QK_K // 16  + QK_K // 32),
 | 
			
		||||
    GGMLQuantizationType.BF16:    (1, 2),
 | 
			
		||||
    GGMLQuantizationType.Q4_0_4_4:(32, 2 + 16),
 | 
			
		||||
    GGMLQuantizationType.Q4_0_4_8:(32, 2 + 16),
 | 
			
		||||
    GGMLQuantizationType.Q4_0_8_8:(32, 2 + 16),
 | 
			
		||||
    GGMLQuantizationType.TQ1_0:   (256, 2 + 4 * 13),
 | 
			
		||||
    GGMLQuantizationType.TQ2_0:   (256, 2 + 64),
 | 
			
		||||
}
 | 
			
		||||
@@ -1482,15 +1838,23 @@ KEY_TOKENIZER_SCORES     = Keys.Tokenizer.SCORES
 | 
			
		||||
KEY_TOKENIZER_MERGES     = Keys.Tokenizer.MERGES
 | 
			
		||||
KEY_TOKENIZER_BOS_ID     = Keys.Tokenizer.BOS_ID
 | 
			
		||||
KEY_TOKENIZER_EOS_ID     = Keys.Tokenizer.EOS_ID
 | 
			
		||||
KEY_TOKENIZER_EOT_ID     = Keys.Tokenizer.EOT_ID
 | 
			
		||||
KEY_TOKENIZER_EOM_ID     = Keys.Tokenizer.EOM_ID
 | 
			
		||||
KEY_TOKENIZER_UNK_ID     = Keys.Tokenizer.UNK_ID
 | 
			
		||||
KEY_TOKENIZER_SEP_ID     = Keys.Tokenizer.SEP_ID
 | 
			
		||||
KEY_TOKENIZER_PAD_ID     = Keys.Tokenizer.PAD_ID
 | 
			
		||||
KEY_TOKENIZER_CLS_ID     = Keys.Tokenizer.CLS_ID
 | 
			
		||||
KEY_TOKENIZER_MASK_ID    = Keys.Tokenizer.MASK_ID
 | 
			
		||||
KEY_TOKENIZER_HF_JSON    = Keys.Tokenizer.HF_JSON
 | 
			
		||||
KEY_TOKENIZER_RWKV       = Keys.Tokenizer.RWKV
 | 
			
		||||
KEY_TOKENIZER_PRIFIX_ID  = Keys.Tokenizer.PREFIX_ID
 | 
			
		||||
 | 
			
		||||
KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
 | 
			
		||||
KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
 | 
			
		||||
KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
 | 
			
		||||
KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
 | 
			
		||||
KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
 | 
			
		||||
KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
 | 
			
		||||
 | 
			
		||||
# deprecated
 | 
			
		||||
KEY_TOKENIZER_PREFIX_ID  = Keys.Tokenizer.PREFIX_ID
 | 
			
		||||
KEY_TOKENIZER_SUFFIX_ID  = Keys.Tokenizer.SUFFIX_ID
 | 
			
		||||
KEY_TOKENIZER_MIDDLE_ID  = Keys.Tokenizer.MIDDLE_ID
 | 
			
		||||
KEY_TOKENIZER_EOT_ID     = Keys.Tokenizer.EOT_ID
 | 
			
		||||
KEY_TOKENIZER_EOM_ID     = Keys.Tokenizer.EOM_ID
 | 
			
		||||
 
 | 
			
		||||
@@ -145,11 +145,10 @@ class GGUFReader:
 | 
			
		||||
        count = int(count)
 | 
			
		||||
        itemsize = int(np.empty([], dtype = dtype).itemsize)
 | 
			
		||||
        end_offs = offset + itemsize * count
 | 
			
		||||
        return (
 | 
			
		||||
            self.data[offset:end_offs]
 | 
			
		||||
            .view(dtype = dtype)[:count]
 | 
			
		||||
            .newbyteorder(override_order or self.byte_order)
 | 
			
		||||
        )
 | 
			
		||||
        arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
 | 
			
		||||
        if override_order is None:
 | 
			
		||||
            return arr
 | 
			
		||||
        return arr.view(arr.dtype.newbyteorder(override_order))
 | 
			
		||||
 | 
			
		||||
    def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
 | 
			
		||||
        if field.name in self.fields:
 | 
			
		||||
 
 | 
			
		||||
@@ -26,6 +26,7 @@ from .constants import (
 | 
			
		||||
    RopeScalingType,
 | 
			
		||||
    PoolingType,
 | 
			
		||||
    TokenType,
 | 
			
		||||
    ExpertGatingFuncType,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
from .quants import quant_shape_from_byte_shape
 | 
			
		||||
@@ -568,6 +569,9 @@ class GGUFWriter:
 | 
			
		||||
    def add_base_model_organization(self, source_id: int, organization: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
 | 
			
		||||
 | 
			
		||||
    def add_base_model_description(self, source_id: int, description: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
 | 
			
		||||
 | 
			
		||||
    def add_base_model_url(self, source_id: int, url: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
 | 
			
		||||
 | 
			
		||||
@@ -580,15 +584,42 @@ class GGUFWriter:
 | 
			
		||||
    def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
 | 
			
		||||
 | 
			
		||||
    def add_dataset_count(self, source_count: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.General.DATASET_COUNT, source_count)
 | 
			
		||||
 | 
			
		||||
    def add_dataset_name(self, source_id: int, name: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
 | 
			
		||||
 | 
			
		||||
    def add_dataset_author(self, source_id: int, author: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
 | 
			
		||||
 | 
			
		||||
    def add_dataset_version(self, source_id: int, version: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
 | 
			
		||||
 | 
			
		||||
    def add_dataset_organization(self, source_id: int, organization: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
 | 
			
		||||
 | 
			
		||||
    def add_dataset_description(self, source_id: int, description: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
 | 
			
		||||
 | 
			
		||||
    def add_dataset_url(self, source_id: int, url: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
 | 
			
		||||
 | 
			
		||||
    def add_dataset_doi(self, source_id: int, doi: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
 | 
			
		||||
 | 
			
		||||
    def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
 | 
			
		||||
 | 
			
		||||
    def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
 | 
			
		||||
        self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
 | 
			
		||||
 | 
			
		||||
    def add_tags(self, tags: Sequence[str]) -> None:
 | 
			
		||||
        self.add_array(Keys.General.TAGS, tags)
 | 
			
		||||
 | 
			
		||||
    def add_languages(self, languages: Sequence[str]) -> None:
 | 
			
		||||
        self.add_array(Keys.General.LANGUAGES, languages)
 | 
			
		||||
 | 
			
		||||
    def add_datasets(self, datasets: Sequence[str]) -> None:
 | 
			
		||||
        self.add_array(Keys.General.DATASETS, datasets)
 | 
			
		||||
 | 
			
		||||
    def add_tensor_data_layout(self, layout: str) -> None:
 | 
			
		||||
        self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
 | 
			
		||||
 | 
			
		||||
@@ -601,6 +632,21 @@ class GGUFWriter:
 | 
			
		||||
    def add_embedding_length(self, length: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
 | 
			
		||||
 | 
			
		||||
    def add_features_length(self, length: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
 | 
			
		||||
 | 
			
		||||
    def add_posnet_embedding_length(self, length: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
 | 
			
		||||
 | 
			
		||||
    def add_posnet_block_count(self, length: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
 | 
			
		||||
 | 
			
		||||
    def add_convnext_embedding_length(self, length: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
 | 
			
		||||
 | 
			
		||||
    def add_convnext_block_count(self, length: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
 | 
			
		||||
 | 
			
		||||
    def add_block_count(self, length: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
 | 
			
		||||
 | 
			
		||||
@@ -670,6 +716,15 @@ class GGUFWriter:
 | 
			
		||||
    def add_expert_weights_scale(self, value: float) -> None:
 | 
			
		||||
        self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_expert_weights_norm(self, value: bool) -> None:
 | 
			
		||||
        self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
 | 
			
		||||
        self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
 | 
			
		||||
 | 
			
		||||
    def add_swin_norm(self, value: bool) -> None:
 | 
			
		||||
        self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_rescale_every_n_layers(self, count: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.LLM.RESCALE_EVERY_N_LAYERS.format(arch=self.arch), count)
 | 
			
		||||
 | 
			
		||||
@@ -679,15 +734,30 @@ class GGUFWriter:
 | 
			
		||||
    def add_time_decay_extra_dim(self, dim: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
 | 
			
		||||
 | 
			
		||||
    def add_residual_scale(self, value: float) -> None:
 | 
			
		||||
        self.add_float32(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_embedding_scale(self, value: float) -> None:
 | 
			
		||||
        self.add_float32(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_wkv_head_size(self, size: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
 | 
			
		||||
 | 
			
		||||
    def add_token_shift_count(self, count: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
 | 
			
		||||
 | 
			
		||||
    def add_layer_norm_eps(self, value: float) -> None:
 | 
			
		||||
        self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_layer_norm_rms_eps(self, value: float) -> None:
 | 
			
		||||
        self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_group_norm_eps(self, value: float) -> None:
 | 
			
		||||
        self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_group_norm_groups(self, value: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_causal_attention(self, value: bool) -> None:
 | 
			
		||||
        self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
@@ -703,12 +773,18 @@ class GGUFWriter:
 | 
			
		||||
    def add_sliding_window(self, value: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_attention_scale(self, value: float) -> None:
 | 
			
		||||
        self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
    def add_pooling_type(self, value: PoolingType) -> None:
 | 
			
		||||
        self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
 | 
			
		||||
 | 
			
		||||
    def add_rope_dimension_count(self, count: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
 | 
			
		||||
 | 
			
		||||
    def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
 | 
			
		||||
        self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
 | 
			
		||||
 | 
			
		||||
    def add_rope_freq_base(self, value: float) -> None:
 | 
			
		||||
        self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
 | 
			
		||||
 | 
			
		||||
@@ -781,9 +857,6 @@ class GGUFWriter:
 | 
			
		||||
    def add_pad_token_id(self, id: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.Tokenizer.PAD_ID, id)
 | 
			
		||||
 | 
			
		||||
    def add_cls_token_id(self, id: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.Tokenizer.CLS_ID, id)
 | 
			
		||||
 | 
			
		||||
    def add_mask_token_id(self, id: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.Tokenizer.MASK_ID, id)
 | 
			
		||||
 | 
			
		||||
@@ -831,15 +904,6 @@ class GGUFWriter:
 | 
			
		||||
 | 
			
		||||
        self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
 | 
			
		||||
 | 
			
		||||
    def add_prefix_token_id(self, id: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
 | 
			
		||||
 | 
			
		||||
    def add_suffix_token_id(self, id: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
 | 
			
		||||
 | 
			
		||||
    def add_middle_token_id(self, id: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
 | 
			
		||||
 | 
			
		||||
    def add_eot_token_id(self, id: int) -> None:
 | 
			
		||||
        self.add_uint32(Keys.Tokenizer.EOT_ID, id)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -41,7 +41,7 @@ class Metadata:
 | 
			
		||||
    base_models: Optional[list[dict]] = None
 | 
			
		||||
    tags: Optional[list[str]] = None
 | 
			
		||||
    languages: Optional[list[str]] = None
 | 
			
		||||
    datasets: Optional[list[str]] = None
 | 
			
		||||
    datasets: Optional[list[dict]] = None
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
 | 
			
		||||
@@ -91,9 +91,11 @@ class Metadata:
 | 
			
		||||
        # Base Models is received here as an array of models
 | 
			
		||||
        metadata.base_models     = metadata_override.get("general.base_models",        metadata.base_models)
 | 
			
		||||
 | 
			
		||||
        # Datasets is received here as an array of datasets
 | 
			
		||||
        metadata.datasets        = metadata_override.get("general.datasets",           metadata.datasets)
 | 
			
		||||
 | 
			
		||||
        metadata.tags            = metadata_override.get(Keys.General.TAGS,            metadata.tags)
 | 
			
		||||
        metadata.languages       = metadata_override.get(Keys.General.LANGUAGES,       metadata.languages)
 | 
			
		||||
        metadata.datasets        = metadata_override.get(Keys.General.DATASETS,        metadata.datasets)
 | 
			
		||||
 | 
			
		||||
        # Direct Metadata Override (via direct cli argument)
 | 
			
		||||
        if model_name is not None:
 | 
			
		||||
@@ -346,12 +348,12 @@ class Metadata:
 | 
			
		||||
            use_model_card_metadata("author", "model_creator")
 | 
			
		||||
            use_model_card_metadata("basename", "model_type")
 | 
			
		||||
 | 
			
		||||
            if "base_model" in model_card:
 | 
			
		||||
            if "base_model" in model_card or "base_models" in model_card or "base_model_sources" in model_card:
 | 
			
		||||
                # This represents the parent models that this is based on
 | 
			
		||||
                # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
 | 
			
		||||
                # Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
 | 
			
		||||
                metadata_base_models = []
 | 
			
		||||
                base_model_value = model_card.get("base_model", None)
 | 
			
		||||
                base_model_value = model_card.get("base_model", model_card.get("base_models", model_card.get("base_model_sources", None)))
 | 
			
		||||
 | 
			
		||||
                if base_model_value is not None:
 | 
			
		||||
                    if isinstance(base_model_value, str):
 | 
			
		||||
@@ -364,18 +366,106 @@ class Metadata:
 | 
			
		||||
 | 
			
		||||
                for model_id in metadata_base_models:
 | 
			
		||||
                    # NOTE: model size of base model is assumed to be similar to the size of the current model
 | 
			
		||||
                    model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
 | 
			
		||||
                    base_model = {}
 | 
			
		||||
                    if model_full_name_component is not None:
 | 
			
		||||
                        base_model["name"] = Metadata.id_to_title(model_full_name_component)
 | 
			
		||||
                    if org_component is not None:
 | 
			
		||||
                        base_model["organization"] = Metadata.id_to_title(org_component)
 | 
			
		||||
                    if version is not None:
 | 
			
		||||
                        base_model["version"] = version
 | 
			
		||||
                    if org_component is not None and model_full_name_component is not None:
 | 
			
		||||
                        base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
 | 
			
		||||
                    if isinstance(model_id, str):
 | 
			
		||||
                        if model_id.startswith("http://") or model_id.startswith("https://") or model_id.startswith("ssh://"):
 | 
			
		||||
                            base_model["repo_url"] = model_id
 | 
			
		||||
 | 
			
		||||
                            # Check if Hugging Face ID is present in URL
 | 
			
		||||
                            if "huggingface.co" in model_id:
 | 
			
		||||
                                match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", model_id)
 | 
			
		||||
                                if match:
 | 
			
		||||
                                    model_id_component = match.group(1)
 | 
			
		||||
                                    model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id_component, total_params)
 | 
			
		||||
 | 
			
		||||
                                    # Populate model dictionary with extracted components
 | 
			
		||||
                                    if model_full_name_component is not None:
 | 
			
		||||
                                        base_model["name"] = Metadata.id_to_title(model_full_name_component)
 | 
			
		||||
                                    if org_component is not None:
 | 
			
		||||
                                        base_model["organization"] = Metadata.id_to_title(org_component)
 | 
			
		||||
                                    if version is not None:
 | 
			
		||||
                                        base_model["version"] = version
 | 
			
		||||
 | 
			
		||||
                        else:
 | 
			
		||||
                            # Likely a Hugging Face ID
 | 
			
		||||
                            model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
 | 
			
		||||
 | 
			
		||||
                            # Populate model dictionary with extracted components
 | 
			
		||||
                            if model_full_name_component is not None:
 | 
			
		||||
                                base_model["name"] = Metadata.id_to_title(model_full_name_component)
 | 
			
		||||
                            if org_component is not None:
 | 
			
		||||
                                base_model["organization"] = Metadata.id_to_title(org_component)
 | 
			
		||||
                            if version is not None:
 | 
			
		||||
                                base_model["version"] = version
 | 
			
		||||
                            if org_component is not None and model_full_name_component is not None:
 | 
			
		||||
                                base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
 | 
			
		||||
 | 
			
		||||
                    elif isinstance(model_id, dict):
 | 
			
		||||
                        base_model = model_id
 | 
			
		||||
 | 
			
		||||
                    else:
 | 
			
		||||
                        logger.error(f"base model entry '{str(model_id)}' not in a known format")
 | 
			
		||||
 | 
			
		||||
                    metadata.base_models.append(base_model)
 | 
			
		||||
 | 
			
		||||
            if "datasets" in model_card or "dataset" in model_card or "dataset_sources" in model_card:
 | 
			
		||||
                # This represents the datasets that this was trained from
 | 
			
		||||
                metadata_datasets = []
 | 
			
		||||
                dataset_value = model_card.get("datasets", model_card.get("dataset", model_card.get("dataset_sources", None)))
 | 
			
		||||
 | 
			
		||||
                if dataset_value is not None:
 | 
			
		||||
                    if isinstance(dataset_value, str):
 | 
			
		||||
                        metadata_datasets.append(dataset_value)
 | 
			
		||||
                    elif isinstance(dataset_value, list):
 | 
			
		||||
                        metadata_datasets.extend(dataset_value)
 | 
			
		||||
 | 
			
		||||
                if metadata.datasets is None:
 | 
			
		||||
                    metadata.datasets = []
 | 
			
		||||
 | 
			
		||||
                for dataset_id in metadata_datasets:
 | 
			
		||||
                    # NOTE: model size of base model is assumed to be similar to the size of the current model
 | 
			
		||||
                    dataset = {}
 | 
			
		||||
                    if isinstance(dataset_id, str):
 | 
			
		||||
                        if dataset_id.startswith(("http://", "https://", "ssh://")):
 | 
			
		||||
                            dataset["repo_url"] = dataset_id
 | 
			
		||||
 | 
			
		||||
                            # Check if Hugging Face ID is present in URL
 | 
			
		||||
                            if "huggingface.co" in dataset_id:
 | 
			
		||||
                                match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", dataset_id)
 | 
			
		||||
                                if match:
 | 
			
		||||
                                    dataset_id_component = match.group(1)
 | 
			
		||||
                                    dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id_component, total_params)
 | 
			
		||||
 | 
			
		||||
                                    # Populate dataset dictionary with extracted components
 | 
			
		||||
                                    if dataset_name_component is not None:
 | 
			
		||||
                                        dataset["name"] = Metadata.id_to_title(dataset_name_component)
 | 
			
		||||
                                    if org_component is not None:
 | 
			
		||||
                                        dataset["organization"] = Metadata.id_to_title(org_component)
 | 
			
		||||
                                    if version is not None:
 | 
			
		||||
                                        dataset["version"] = version
 | 
			
		||||
 | 
			
		||||
                        else:
 | 
			
		||||
                            # Likely a Hugging Face ID
 | 
			
		||||
                            dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id, total_params)
 | 
			
		||||
 | 
			
		||||
                            # Populate dataset dictionary with extracted components
 | 
			
		||||
                            if dataset_name_component is not None:
 | 
			
		||||
                                dataset["name"] = Metadata.id_to_title(dataset_name_component)
 | 
			
		||||
                            if org_component is not None:
 | 
			
		||||
                                dataset["organization"] = Metadata.id_to_title(org_component)
 | 
			
		||||
                            if version is not None:
 | 
			
		||||
                                dataset["version"] = version
 | 
			
		||||
                            if org_component is not None and dataset_name_component is not None:
 | 
			
		||||
                                dataset["repo_url"] = f"https://huggingface.co/{org_component}/{dataset_name_component}"
 | 
			
		||||
 | 
			
		||||
                    elif isinstance(dataset_id, dict):
 | 
			
		||||
                        dataset = dataset_id
 | 
			
		||||
 | 
			
		||||
                    else:
 | 
			
		||||
                        logger.error(f"dataset entry '{str(dataset_id)}' not in a known format")
 | 
			
		||||
 | 
			
		||||
                    metadata.datasets.append(dataset)
 | 
			
		||||
 | 
			
		||||
            use_model_card_metadata("license", "license")
 | 
			
		||||
            use_model_card_metadata("license_name", "license_name")
 | 
			
		||||
            use_model_card_metadata("license_link", "license_link")
 | 
			
		||||
@@ -386,9 +476,6 @@ class Metadata:
 | 
			
		||||
            use_array_model_card_metadata("languages", "languages")
 | 
			
		||||
            use_array_model_card_metadata("languages", "language")
 | 
			
		||||
 | 
			
		||||
            use_array_model_card_metadata("datasets", "datasets")
 | 
			
		||||
            use_array_model_card_metadata("datasets", "dataset")
 | 
			
		||||
 | 
			
		||||
        # Hugging Face Parameter Heuristics
 | 
			
		||||
        ####################################
 | 
			
		||||
 | 
			
		||||
@@ -458,7 +545,10 @@ class Metadata:
 | 
			
		||||
            gguf_writer.add_size_label(self.size_label)
 | 
			
		||||
 | 
			
		||||
        if self.license is not None:
 | 
			
		||||
            gguf_writer.add_license(self.license)
 | 
			
		||||
            if isinstance(self.license, list):
 | 
			
		||||
                gguf_writer.add_license(",".join(self.license))
 | 
			
		||||
            else:
 | 
			
		||||
                gguf_writer.add_license(self.license)
 | 
			
		||||
        if self.license_name is not None:
 | 
			
		||||
            gguf_writer.add_license_name(self.license_name)
 | 
			
		||||
        if self.license_link is not None:
 | 
			
		||||
@@ -493,6 +583,8 @@ class Metadata:
 | 
			
		||||
                    gguf_writer.add_base_model_version(key, base_model_entry["version"])
 | 
			
		||||
                if "organization" in base_model_entry:
 | 
			
		||||
                    gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
 | 
			
		||||
                if "description" in base_model_entry:
 | 
			
		||||
                    gguf_writer.add_base_model_description(key, base_model_entry["description"])
 | 
			
		||||
                if "url" in base_model_entry:
 | 
			
		||||
                    gguf_writer.add_base_model_url(key, base_model_entry["url"])
 | 
			
		||||
                if "doi" in base_model_entry:
 | 
			
		||||
@@ -502,9 +594,29 @@ class Metadata:
 | 
			
		||||
                if "repo_url" in base_model_entry:
 | 
			
		||||
                    gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
 | 
			
		||||
 | 
			
		||||
        if self.datasets is not None:
 | 
			
		||||
            gguf_writer.add_dataset_count(len(self.datasets))
 | 
			
		||||
            for key, dataset_entry in enumerate(self.datasets):
 | 
			
		||||
                if "name" in dataset_entry:
 | 
			
		||||
                    gguf_writer.add_dataset_name(key, dataset_entry["name"])
 | 
			
		||||
                if "author" in dataset_entry:
 | 
			
		||||
                    gguf_writer.add_dataset_author(key, dataset_entry["author"])
 | 
			
		||||
                if "version" in dataset_entry:
 | 
			
		||||
                    gguf_writer.add_dataset_version(key, dataset_entry["version"])
 | 
			
		||||
                if "organization" in dataset_entry:
 | 
			
		||||
                    gguf_writer.add_dataset_organization(key, dataset_entry["organization"])
 | 
			
		||||
                if "description" in dataset_entry:
 | 
			
		||||
                    gguf_writer.add_dataset_description(key, dataset_entry["description"])
 | 
			
		||||
                if "url" in dataset_entry:
 | 
			
		||||
                    gguf_writer.add_dataset_url(key, dataset_entry["url"])
 | 
			
		||||
                if "doi" in dataset_entry:
 | 
			
		||||
                    gguf_writer.add_dataset_doi(key, dataset_entry["doi"])
 | 
			
		||||
                if "uuid" in dataset_entry:
 | 
			
		||||
                    gguf_writer.add_dataset_uuid(key, dataset_entry["uuid"])
 | 
			
		||||
                if "repo_url" in dataset_entry:
 | 
			
		||||
                    gguf_writer.add_dataset_repo_url(key, dataset_entry["repo_url"])
 | 
			
		||||
 | 
			
		||||
        if self.tags is not None:
 | 
			
		||||
            gguf_writer.add_tags(self.tags)
 | 
			
		||||
        if self.languages is not None:
 | 
			
		||||
            gguf_writer.add_languages(self.languages)
 | 
			
		||||
        if self.datasets is not None:
 | 
			
		||||
            gguf_writer.add_datasets(self.datasets)
 | 
			
		||||
 
 | 
			
		||||
@@ -11,8 +11,8 @@ from pathlib import Path
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
# Necessary to load the local gguf package
 | 
			
		||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
 | 
			
		||||
    sys.path.insert(0, str(Path(__file__).parent.parent))
 | 
			
		||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
 | 
			
		||||
    sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 | 
			
		||||
 | 
			
		||||
import gguf
 | 
			
		||||
 | 
			
		||||
@@ -12,8 +12,8 @@ from typing import Any
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
# Necessary to load the local gguf package
 | 
			
		||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
 | 
			
		||||
    sys.path.insert(0, str(Path(__file__).parent.parent))
 | 
			
		||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
 | 
			
		||||
    sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 | 
			
		||||
 | 
			
		||||
from gguf import GGUFReader, GGUFValueType, ReaderTensor  # noqa: E402
 | 
			
		||||
 | 
			
		||||
@@ -13,8 +13,8 @@ from pathlib import Path
 | 
			
		||||
from tqdm import tqdm
 | 
			
		||||
 | 
			
		||||
# Necessary to load the local gguf package
 | 
			
		||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
 | 
			
		||||
    sys.path.insert(0, str(Path(__file__).parent.parent))
 | 
			
		||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
 | 
			
		||||
    sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 | 
			
		||||
 | 
			
		||||
from gguf import GGUFReader  # noqa: E402
 | 
			
		||||
 | 
			
		||||
@@ -13,8 +13,8 @@ from tqdm import tqdm
 | 
			
		||||
from typing import Any, Sequence, NamedTuple
 | 
			
		||||
 | 
			
		||||
# Necessary to load the local gguf package
 | 
			
		||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
 | 
			
		||||
    sys.path.insert(0, str(Path(__file__).parent.parent))
 | 
			
		||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
 | 
			
		||||
    sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 | 
			
		||||
 | 
			
		||||
import gguf
 | 
			
		||||
 | 
			
		||||
@@ -6,8 +6,8 @@ import sys
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
 | 
			
		||||
# Necessary to load the local gguf package
 | 
			
		||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
 | 
			
		||||
    sys.path.insert(0, str(Path(__file__).parent.parent))
 | 
			
		||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
 | 
			
		||||
    sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 | 
			
		||||
 | 
			
		||||
from gguf import GGUFReader  # noqa: E402
 | 
			
		||||
 | 
			
		||||
@@ -13,7 +13,7 @@ class TensorNameMap:
 | 
			
		||||
            "transformer.wte",                           # gpt2 gpt-j mpt refact qwen dbrx jais exaone
 | 
			
		||||
            "transformer.word_embeddings",               # falcon
 | 
			
		||||
            "word_embeddings",                           # bloom
 | 
			
		||||
            "model.embed_tokens",                        # llama-hf nemotron
 | 
			
		||||
            "model.embed_tokens",                        # llama-hf nemotron olmoe olmo2 rwkv6qwen2
 | 
			
		||||
            "tok_embeddings",                            # llama-pth
 | 
			
		||||
            "embeddings.word_embeddings",                # bert nomic-bert
 | 
			
		||||
            "language_model.embedding.word_embeddings",  # persimmon
 | 
			
		||||
@@ -42,6 +42,7 @@ class TensorNameMap:
 | 
			
		||||
            "emb_ln",                     # nomic-bert
 | 
			
		||||
            "transformer.norm",           # openelm
 | 
			
		||||
            "rwkv.blocks.0.pre_ln",       # rwkv
 | 
			
		||||
            "backbone.norm",              # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        # Position embeddings
 | 
			
		||||
@@ -54,19 +55,20 @@ class TensorNameMap:
 | 
			
		||||
        # Output
 | 
			
		||||
        MODEL_TENSOR.OUTPUT: (
 | 
			
		||||
            "embed_out",                 # gptneox
 | 
			
		||||
            "lm_head",                   # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone
 | 
			
		||||
            "lm_head",                   # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
 | 
			
		||||
            "output",                    # llama-pth bloom internlm2
 | 
			
		||||
            "word_embeddings_for_head",  # persimmon
 | 
			
		||||
            "lm_head.linear",            # phi2
 | 
			
		||||
            "output_layer",              # chatglm
 | 
			
		||||
            "head",                      # rwkv
 | 
			
		||||
            "head.out",                  # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        # Output norm
 | 
			
		||||
        MODEL_TENSOR.OUTPUT_NORM: (
 | 
			
		||||
            "gpt_neox.final_layer_norm",               # gptneox
 | 
			
		||||
            "transformer.ln_f",                        # gpt2 gpt-j falcon jais exaone
 | 
			
		||||
            "model.norm",                              # llama-hf baichuan internlm2
 | 
			
		||||
            "model.norm",                              # llama-hf baichuan internlm2 olmoe olmo2 phimoe
 | 
			
		||||
            "norm",                                    # llama-pth
 | 
			
		||||
            "transformer.norm_f",                      # mpt dbrx
 | 
			
		||||
            "ln_f",                                    # refact bloom qwen gpt2
 | 
			
		||||
@@ -80,6 +82,7 @@ class TensorNameMap:
 | 
			
		||||
            "transformer.norm",                        # openelm
 | 
			
		||||
            "model.norm",                              # nemotron
 | 
			
		||||
            "rwkv.ln_out",                             # rwkv
 | 
			
		||||
            "backbone.final_layer_norm",               # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        # Rope frequencies
 | 
			
		||||
@@ -87,6 +90,13 @@ class TensorNameMap:
 | 
			
		||||
            "rope.freqs",  # llama-pth
 | 
			
		||||
            "rotary_pos_emb.inv_freq",  # chatglm
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.ROPE_FACTORS_LONG: (),
 | 
			
		||||
        MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.CONV1D: (
 | 
			
		||||
            "backbone.embed", # roberta
 | 
			
		||||
        ),
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
 | 
			
		||||
@@ -98,7 +108,7 @@ class TensorNameMap:
 | 
			
		||||
            "transformer.h.{bid}.input_layernorm",                  # falcon7b
 | 
			
		||||
            "h.{bid}.input_layernorm",                              # bloom
 | 
			
		||||
            "transformer.h.{bid}.ln_mlp",                           # falcon40b
 | 
			
		||||
            "model.layers.{bid}.input_layernorm",                   # llama-hf nemotron
 | 
			
		||||
            "model.layers.{bid}.input_layernorm",                   # llama-hf nemotron olmoe phimoe
 | 
			
		||||
            "layers.{bid}.attention_norm",                          # llama-pth
 | 
			
		||||
            "language_model.encoder.layers.{bid}.input_layernorm",  # persimmon
 | 
			
		||||
            "model.layers.{bid}.ln1",                               # yi
 | 
			
		||||
@@ -142,7 +152,8 @@ class TensorNameMap:
 | 
			
		||||
 | 
			
		||||
        # Attention query
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q: (
 | 
			
		||||
            "model.layers.{bid}.self_attn.q_proj",                       # llama-hf nemotron
 | 
			
		||||
            "model.layers.{bid}.self_attn.q_proj",                       # llama-hf nemotron olmoe olmo2 phimoe
 | 
			
		||||
            "model.layers.{bid}.self_attn.q_proj_no_perm",               # llama-custom
 | 
			
		||||
            "layers.{bid}.attention.wq",                                 # llama-pth
 | 
			
		||||
            "encoder.layer.{bid}.attention.self.query",                  # bert
 | 
			
		||||
            "transformer.h.{bid}.attn.q_proj",                           # gpt-j
 | 
			
		||||
@@ -154,7 +165,8 @@ class TensorNameMap:
 | 
			
		||||
 | 
			
		||||
        # Attention key
 | 
			
		||||
        MODEL_TENSOR.ATTN_K: (
 | 
			
		||||
            "model.layers.{bid}.self_attn.k_proj",                     # llama-hf nemotron
 | 
			
		||||
            "model.layers.{bid}.self_attn.k_proj",                     # llama-hf nemotron olmoe olmo2 phimoe
 | 
			
		||||
            "model.layers.{bid}.self_attn.k_proj_no_perm",             # llama-custom
 | 
			
		||||
            "layers.{bid}.attention.wk",                               # llama-pth
 | 
			
		||||
            "encoder.layer.{bid}.attention.self.key",                  # bert
 | 
			
		||||
            "transformer.h.{bid}.attn.k_proj",                         # gpt-j
 | 
			
		||||
@@ -167,7 +179,7 @@ class TensorNameMap:
 | 
			
		||||
 | 
			
		||||
        # Attention value
 | 
			
		||||
        MODEL_TENSOR.ATTN_V: (
 | 
			
		||||
            "model.layers.{bid}.self_attn.v_proj",                       # llama-hf nemotron
 | 
			
		||||
            "model.layers.{bid}.self_attn.v_proj",                       # llama-hf nemotron olmoe olmo2 phimoe
 | 
			
		||||
            "layers.{bid}.attention.wv",                                 # llama-pth
 | 
			
		||||
            "encoder.layer.{bid}.attention.self.value",                  # bert
 | 
			
		||||
            "transformer.h.{bid}.attn.v_proj",                           # gpt-j
 | 
			
		||||
@@ -185,7 +197,8 @@ class TensorNameMap:
 | 
			
		||||
            "transformer.blocks.{bid}.attn.out_proj",                       # mpt
 | 
			
		||||
            "transformer.h.{bid}.self_attention.dense",                     # falcon
 | 
			
		||||
            "h.{bid}.self_attention.dense",                                 # bloom
 | 
			
		||||
            "model.layers.{bid}.self_attn.o_proj",                          # llama-hf nemotron
 | 
			
		||||
            "model.layers.{bid}.self_attn.o_proj",                          # llama-hf nemotron olmoe olmo2 phimoe
 | 
			
		||||
            "model.layers.{bid}.self_attn.linear_attn",                     # deci
 | 
			
		||||
            "layers.{bid}.attention.wo",                                    # llama-pth
 | 
			
		||||
            "encoder.layer.{bid}.attention.output.dense",                   # bert
 | 
			
		||||
            "transformer.h.{bid}.attn.out_proj",                            # gpt-j
 | 
			
		||||
@@ -212,7 +225,7 @@ class TensorNameMap:
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.ATTN_POST_NORM: (
 | 
			
		||||
            "model.layers.{bid}.post_attention_layernorm",     # gemma2
 | 
			
		||||
            "model.layers.{bid}.post_attention_layernorm",     # gemma2 olmo2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        # Rotary embeddings
 | 
			
		||||
@@ -229,7 +242,7 @@ class TensorNameMap:
 | 
			
		||||
            "transformer.h.{bid}.ln_2",                                      # gpt2 refact qwen jais exaone
 | 
			
		||||
            "h.{bid}.post_attention_layernorm",                              # bloom
 | 
			
		||||
            "transformer.blocks.{bid}.norm_2",                               # mpt
 | 
			
		||||
            "model.layers.{bid}.post_attention_layernorm",                   # llama-hf nemotron
 | 
			
		||||
            "model.layers.{bid}.post_attention_layernorm",                   # llama-hf nemotron olmoe phimoe
 | 
			
		||||
            "layers.{bid}.ffn_norm",                                         # llama-pth
 | 
			
		||||
            "language_model.encoder.layers.{bid}.post_attention_layernorm",  # persimmon
 | 
			
		||||
            "model.layers.{bid}.ln2",                                        # yi
 | 
			
		||||
@@ -247,21 +260,26 @@ class TensorNameMap:
 | 
			
		||||
 | 
			
		||||
        # Post feed-forward norm
 | 
			
		||||
        MODEL_TENSOR.FFN_POST_NORM: (
 | 
			
		||||
            "model.layers.{bid}.post_feedforward_layernorm", # gemma2
 | 
			
		||||
            "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_INP: (
 | 
			
		||||
            "layers.{bid}.feed_forward.gate",             # mixtral
 | 
			
		||||
            "model.layers.{bid}.block_sparse_moe.gate",   # mixtral
 | 
			
		||||
            "model.layers.{bid}.mlp.gate",                # qwen2moe
 | 
			
		||||
            "transformer.decoder_layer.{bid}.router",     # Grok
 | 
			
		||||
            "transformer.blocks.{bid}.ffn.router.layer",  # dbrx
 | 
			
		||||
            "layers.{bid}.feed_forward.gate",                   # mixtral
 | 
			
		||||
            "model.layers.{bid}.block_sparse_moe.gate",         # mixtral phimoe
 | 
			
		||||
            "model.layers.{bid}.mlp.gate",                      # qwen2moe olmoe
 | 
			
		||||
            "transformer.decoder_layer.{bid}.router",           # Grok
 | 
			
		||||
            "transformer.blocks.{bid}.ffn.router.layer",        # dbrx
 | 
			
		||||
            "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
 | 
			
		||||
            "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.FFN_EXP_PROBS_B: (
 | 
			
		||||
            "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        # Feed-forward up
 | 
			
		||||
        MODEL_TENSOR.FFN_UP: (
 | 
			
		||||
            "gpt_neox.layers.{bid}.mlp.dense_h_to_4h",                # gptneox
 | 
			
		||||
@@ -269,7 +287,7 @@ class TensorNameMap:
 | 
			
		||||
            "transformer.blocks.{bid}.ffn.up_proj",                   # mpt
 | 
			
		||||
            "transformer.h.{bid}.mlp.dense_h_to_4h",                  # falcon
 | 
			
		||||
            "h.{bid}.mlp.dense_h_to_4h",                              # bloom
 | 
			
		||||
            "model.layers.{bid}.mlp.up_proj",                         # llama-hf refact nemotron
 | 
			
		||||
            "model.layers.{bid}.mlp.up_proj",                         # llama-hf refact nemotron olmo2
 | 
			
		||||
            "layers.{bid}.feed_forward.w3",                           # llama-pth
 | 
			
		||||
            "encoder.layer.{bid}.intermediate.dense",                 # bert
 | 
			
		||||
            "transformer.h.{bid}.mlp.fc_in",                          # gpt-j
 | 
			
		||||
@@ -292,15 +310,16 @@ class TensorNameMap:
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_EXP: (
 | 
			
		||||
            "layers.{bid}.feed_forward.experts.w3",          # mixtral (merged)
 | 
			
		||||
            "transformer.decoder_layer.{bid}.moe.linear_v",  # Grok (merged)
 | 
			
		||||
            "transformer.blocks.{bid}.ffn.experts.mlp.v1",   # dbrx
 | 
			
		||||
            "model.layers.{bid}.mlp.experts.up_proj",        # qwen2moe (merged)
 | 
			
		||||
            "layers.{bid}.feed_forward.experts.w3",           # mixtral (merged)
 | 
			
		||||
            "transformer.decoder_layer.{bid}.moe.linear_v",   # Grok (merged)
 | 
			
		||||
            "transformer.blocks.{bid}.ffn.experts.mlp.v1",    # dbrx
 | 
			
		||||
            "model.layers.{bid}.mlp.experts.up_proj",         # qwen2moe olmoe (merged)
 | 
			
		||||
            "model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.FFN_UP_SHEXP: (
 | 
			
		||||
            "model.layers.{bid}.mlp.shared_expert.up_proj",  # qwen2moe
 | 
			
		||||
            "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
 | 
			
		||||
            "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        # AWQ-activation gate
 | 
			
		||||
@@ -310,7 +329,7 @@ class TensorNameMap:
 | 
			
		||||
 | 
			
		||||
        # Feed-forward gate
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE: (
 | 
			
		||||
            "model.layers.{bid}.mlp.gate_proj",           # llama-hf refact
 | 
			
		||||
            "model.layers.{bid}.mlp.gate_proj",           # llama-hf refact olmo2
 | 
			
		||||
            "layers.{bid}.feed_forward.w1",               # llama-pth
 | 
			
		||||
            "transformer.h.{bid}.mlp.w2",                 # qwen
 | 
			
		||||
            "transformer.h.{bid}.mlp.c_fc2",              # jais
 | 
			
		||||
@@ -324,15 +343,16 @@ class TensorNameMap:
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_EXP: (
 | 
			
		||||
            "layers.{bid}.feed_forward.experts.w1",         # mixtral (merged)
 | 
			
		||||
            "transformer.decoder_layer.{bid}.moe.linear",   # Grok (merged)
 | 
			
		||||
            "transformer.blocks.{bid}.ffn.experts.mlp.w1",  # dbrx
 | 
			
		||||
            "model.layers.{bid}.mlp.experts.gate_proj",     # qwen2moe (merged)
 | 
			
		||||
            "layers.{bid}.feed_forward.experts.w1",           # mixtral (merged)
 | 
			
		||||
            "transformer.decoder_layer.{bid}.moe.linear",     # Grok (merged)
 | 
			
		||||
            "transformer.blocks.{bid}.ffn.experts.mlp.w1",    # dbrx
 | 
			
		||||
            "model.layers.{bid}.mlp.experts.gate_proj",       # qwen2moe olmoe (merged)
 | 
			
		||||
            "model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.FFN_GATE_SHEXP: (
 | 
			
		||||
            "model.layers.{bid}.mlp.shared_expert.gate_proj",  # qwen2moe
 | 
			
		||||
            "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
 | 
			
		||||
            "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        # Feed-forward down
 | 
			
		||||
@@ -342,7 +362,7 @@ class TensorNameMap:
 | 
			
		||||
            "transformer.blocks.{bid}.ffn.down_proj",                 # mpt
 | 
			
		||||
            "transformer.h.{bid}.mlp.dense_4h_to_h",                  # falcon
 | 
			
		||||
            "h.{bid}.mlp.dense_4h_to_h",                              # bloom
 | 
			
		||||
            "model.layers.{bid}.mlp.down_proj",                       # llama-hf nemotron
 | 
			
		||||
            "model.layers.{bid}.mlp.down_proj",                       # llama-hf nemotron olmo2
 | 
			
		||||
            "layers.{bid}.feed_forward.w2",                           # llama-pth
 | 
			
		||||
            "encoder.layer.{bid}.output.dense",                       # bert
 | 
			
		||||
            "transformer.h.{bid}.mlp.fc_out",                         # gpt-j
 | 
			
		||||
@@ -364,21 +384,23 @@ class TensorNameMap:
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_EXP: (
 | 
			
		||||
            "layers.{bid}.feed_forward.experts.w2",          # mixtral (merged)
 | 
			
		||||
            "transformer.decoder_layer.{bid}.moe.linear_1",  # Grok (merged)
 | 
			
		||||
            "transformer.blocks.{bid}.ffn.experts.mlp.w2",   # dbrx
 | 
			
		||||
            "model.layers.{bid}.mlp.experts.down_proj",      # qwen2moe (merged)
 | 
			
		||||
            "layers.{bid}.feed_forward.experts.w2",              # mixtral (merged)
 | 
			
		||||
            "transformer.decoder_layer.{bid}.moe.linear_1",      # Grok (merged)
 | 
			
		||||
            "transformer.blocks.{bid}.ffn.experts.mlp.w2",       # dbrx
 | 
			
		||||
            "model.layers.{bid}.mlp.experts.down_proj",          # qwen2moe olmoe (merged)
 | 
			
		||||
            "model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
 | 
			
		||||
            "model.layers.{bid}.block_sparse_moe.experts.w2",    # phimoe (merged)
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.FFN_DOWN_SHEXP: (
 | 
			
		||||
            "model.layers.{bid}.mlp.shared_expert.down_proj",  # qwen2moe
 | 
			
		||||
            "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
 | 
			
		||||
            "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.ATTN_Q_NORM: (
 | 
			
		||||
            "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
 | 
			
		||||
            "model.layers.{bid}.self_attn.q_layernorm",                       # persimmon
 | 
			
		||||
            "model.layers.{bid}.self_attn.q_norm",                            # cohere
 | 
			
		||||
            "model.layers.{bid}.self_attn.q_norm",                            # cohere olmoe chameleon olmo2
 | 
			
		||||
            "transformer.blocks.{bid}.attn.q_ln",                             # sea-lion
 | 
			
		||||
            "encoder.layer.{bid}.attention.self.layer_norm_q",                # jina-bert-v2
 | 
			
		||||
            "transformer.layers.{bid}.attn.q_norm",                           # openelm
 | 
			
		||||
@@ -387,7 +409,7 @@ class TensorNameMap:
 | 
			
		||||
        MODEL_TENSOR.ATTN_K_NORM: (
 | 
			
		||||
            "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
 | 
			
		||||
            "model.layers.{bid}.self_attn.k_layernorm",                       # persimmon
 | 
			
		||||
            "model.layers.{bid}.self_attn.k_norm",                            # cohere
 | 
			
		||||
            "model.layers.{bid}.self_attn.k_norm",                            # cohere olmoe chameleon olmo2
 | 
			
		||||
            "transformer.blocks.{bid}.attn.k_ln",                             # sea-lion
 | 
			
		||||
            "encoder.layer.{bid}.attention.self.layer_norm_k",                # jina-bert-v2
 | 
			
		||||
            "transformer.layers.{bid}.attn.k_norm",                           # openelm
 | 
			
		||||
@@ -442,34 +464,42 @@ class TensorNameMap:
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_W1: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_maa_w1",  # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_maa_w1", # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_W2: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_maa_w2",  # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_maa_w2", # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_X: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_maa_x",   # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_maa_x",  # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_K: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_maa_k",   # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_maa_k",  # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_V: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_maa_v",   # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_maa_v",  # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_R: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_maa_r",   # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_maa_r",  # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_G: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_maa_g",   # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_maa_g",  # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LERP_W: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_maa_w",   # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_maa_w",  # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_FIRST: (
 | 
			
		||||
@@ -478,30 +508,37 @@ class TensorNameMap:
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_DECAY: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_decay",   # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_decay",  # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_DECAY_W1: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_decay_w1",  # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_decay_w1", # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_DECAY_W2: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.time_decay_w2",  # rwkv v6
 | 
			
		||||
            "model.layers.{bid}.self_attn.time_decay_w2", # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_KEY: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.key", # rwkv
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.key",     # rwkv
 | 
			
		||||
            "model.layers.{bid}.self_attn.k_proj", # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_VALUE: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.value", # rwkv
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.value",   # rwkv
 | 
			
		||||
            "model.layers.{bid}.self_attn.v_proj", # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.receptance", # rwkv
 | 
			
		||||
            "model.layers.{bid}.self_attn.q_proj",    # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_GATE: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.gate", # rwkv
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.gate",  # rwkv
 | 
			
		||||
            "model.layers.{bid}.self_attn.gate", # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_LN: (
 | 
			
		||||
@@ -509,7 +546,8 @@ class TensorNameMap:
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.TIME_MIX_OUTPUT: (
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.output", # rwkv
 | 
			
		||||
            "rwkv.blocks.{bid}.attention.output",  # rwkv
 | 
			
		||||
            "model.layers.{bid}.self_attn.o_proj", # rwkv6qwen2
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
 | 
			
		||||
@@ -674,9 +712,81 @@ class TensorNameMap:
 | 
			
		||||
            "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        ############################################################################
 | 
			
		||||
        # TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
 | 
			
		||||
        MODEL_TENSOR.ENC_OUTPUT_NORM: (
 | 
			
		||||
            "encoder.final_layer_norm", # t5
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.CLS: (
 | 
			
		||||
            "classifier",       # jina
 | 
			
		||||
            "classifier.dense", # roberta
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.CLS_OUT: (
 | 
			
		||||
            "classifier.out_proj", # roberta
 | 
			
		||||
        ),
 | 
			
		||||
        #############################################################################
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.CONVNEXT_DW: (
 | 
			
		||||
            "backbone.convnext.{bid}.dwconv", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.CONVNEXT_NORM: (
 | 
			
		||||
            "backbone.convnext.{bid}.norm", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.CONVNEXT_PW1: (
 | 
			
		||||
            "backbone.convnext.{bid}.pwconv1", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.CONVNEXT_PW2: (
 | 
			
		||||
            "backbone.convnext.{bid}.pwconv2", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.CONVNEXT_GAMMA: (
 | 
			
		||||
            "backbone.convnext.{bid}.gamma", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.POSNET_CONV1: (
 | 
			
		||||
            "backbone.posnet.{bid}.conv1", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.POSNET_CONV2: (
 | 
			
		||||
            "backbone.posnet.{bid}.conv2", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.POSNET_NORM: (
 | 
			
		||||
            "backbone.posnet.{bid}.norm", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.POSNET_NORM1: (
 | 
			
		||||
            "backbone.posnet.{bid}.norm1", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.POSNET_NORM2: (
 | 
			
		||||
            "backbone.posnet.{bid}.norm2", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.POSNET_ATTN_NORM: (
 | 
			
		||||
            "backbone.posnet.{bid}.norm", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.POSNET_ATTN_Q: (
 | 
			
		||||
            "backbone.posnet.{bid}.q", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.POSNET_ATTN_K: (
 | 
			
		||||
            "backbone.posnet.{bid}.k", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.POSNET_ATTN_V: (
 | 
			
		||||
            "backbone.posnet.{bid}.v", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
 | 
			
		||||
        MODEL_TENSOR.POSNET_ATTN_OUT: (
 | 
			
		||||
            "backbone.posnet.{bid}.proj_out", # wavtokenizer
 | 
			
		||||
        ),
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # architecture-specific block mappings
 | 
			
		||||
 
 | 
			
		||||
@@ -122,8 +122,30 @@ class SpecialVocab:
 | 
			
		||||
                tokenizer = json.load(f)
 | 
			
		||||
            if self.load_merges:
 | 
			
		||||
                merges = tokenizer.get('model', {}).get('merges')
 | 
			
		||||
                if isinstance(merges, list) and merges and isinstance(merges[0], str):
 | 
			
		||||
                    self.merges = merges
 | 
			
		||||
                if isinstance(merges, list) and merges:
 | 
			
		||||
                    if isinstance(merges[0], str):
 | 
			
		||||
                        self.merges = merges
 | 
			
		||||
                    elif isinstance(merges[0], list) and len(merges[0]) == 2 and isinstance(merges[0][0], str):
 | 
			
		||||
                        # New format since transformers 4.45 to support spaces in merges
 | 
			
		||||
                        # ref: https://github.com/ggerganov/llama.cpp/issues/9692
 | 
			
		||||
                        # TODO: internally store as the new format instead of converting to old
 | 
			
		||||
                        if any(' ' in s for pair in merges for s in pair):
 | 
			
		||||
                            logger.warning(f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}')
 | 
			
		||||
                        self.merges = [
 | 
			
		||||
                            ' '.join(
 | 
			
		||||
                                [
 | 
			
		||||
                                    # ensure the spaces are properly encoded
 | 
			
		||||
                                    ''.join(
 | 
			
		||||
                                        chr(ord(c) + 256) if c == ' ' else c
 | 
			
		||||
                                        for c in part
 | 
			
		||||
                                    )
 | 
			
		||||
                                    for part in pair
 | 
			
		||||
                                ]
 | 
			
		||||
                            )
 | 
			
		||||
                            for pair in merges
 | 
			
		||||
                        ]
 | 
			
		||||
                    else:
 | 
			
		||||
                        raise ValueError("Unknown tokenizer merges format")
 | 
			
		||||
            added_tokens = tokenizer.get('added_tokens', {})
 | 
			
		||||
        else:
 | 
			
		||||
            added_tokens = {}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,12 +1,11 @@
 | 
			
		||||
[tool.poetry]
 | 
			
		||||
name = "gguf"
 | 
			
		||||
version = "0.10.0"
 | 
			
		||||
version = "0.15.0"
 | 
			
		||||
description = "Read and write ML models in GGUF for GGML"
 | 
			
		||||
authors = ["GGML <ggml@ggml.ai>"]
 | 
			
		||||
packages = [
 | 
			
		||||
    {include = "gguf"},
 | 
			
		||||
    {include = "gguf/py.typed"},
 | 
			
		||||
    {include = "scripts"},
 | 
			
		||||
]
 | 
			
		||||
readme = "README.md"
 | 
			
		||||
homepage = "https://ggml.ai"
 | 
			
		||||
@@ -33,7 +32,7 @@ requires = ["poetry-core>=1.0.0"]
 | 
			
		||||
build-backend = "poetry.core.masonry.api"
 | 
			
		||||
 | 
			
		||||
[tool.poetry.scripts]
 | 
			
		||||
gguf-convert-endian = "scripts:gguf_convert_endian_entrypoint"
 | 
			
		||||
gguf-dump = "scripts:gguf_dump_entrypoint"
 | 
			
		||||
gguf-set-metadata = "scripts:gguf_set_metadata_entrypoint"
 | 
			
		||||
gguf-new-metadata = "scripts:gguf_new_metadata_entrypoint"
 | 
			
		||||
gguf-convert-endian = "gguf.scripts:gguf_convert_endian_entrypoint"
 | 
			
		||||
gguf-dump = "gguf.scripts:gguf_dump_entrypoint"
 | 
			
		||||
gguf-set-metadata = "gguf.scripts:gguf_set_metadata_entrypoint"
 | 
			
		||||
gguf-new-metadata = "gguf.scripts:gguf_new_metadata_entrypoint"
 | 
			
		||||
 
 | 
			
		||||
@@ -182,8 +182,43 @@ class TestMetadataMethod(unittest.TestCase):
 | 
			
		||||
        expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'version': '14-v0', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'version': 'v1', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}]
 | 
			
		||||
        expect.tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl']
 | 
			
		||||
        expect.languages=['en']
 | 
			
		||||
        expect.datasets=['teknium/OpenHermes-2.5']
 | 
			
		||||
        expect.datasets=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]
 | 
			
		||||
        self.assertEqual(got, expect)
 | 
			
		||||
 | 
			
		||||
        # Base Model spec is inferred from model id
 | 
			
		||||
        model_card = {'base_models': 'teknium/OpenHermes-2.5'}
 | 
			
		||||
        expect = gguf.Metadata(base_models=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}])
 | 
			
		||||
        got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None)
 | 
			
		||||
        self.assertEqual(got, expect)
 | 
			
		||||
 | 
			
		||||
        # Base Model spec is only url
 | 
			
		||||
        model_card = {'base_models': ['https://huggingface.co/teknium/OpenHermes-2.5']}
 | 
			
		||||
        expect = gguf.Metadata(base_models=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}])
 | 
			
		||||
        got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None)
 | 
			
		||||
        self.assertEqual(got, expect)
 | 
			
		||||
 | 
			
		||||
        # Base Model spec is given directly
 | 
			
		||||
        model_card = {'base_models': [{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]}
 | 
			
		||||
        expect = gguf.Metadata(base_models=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}])
 | 
			
		||||
        got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None)
 | 
			
		||||
        self.assertEqual(got, expect)
 | 
			
		||||
 | 
			
		||||
        # Dataset spec is inferred from model id
 | 
			
		||||
        model_card = {'datasets': 'teknium/OpenHermes-2.5'}
 | 
			
		||||
        expect = gguf.Metadata(datasets=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}])
 | 
			
		||||
        got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None)
 | 
			
		||||
        self.assertEqual(got, expect)
 | 
			
		||||
 | 
			
		||||
        # Dataset spec is only url
 | 
			
		||||
        model_card = {'datasets': ['https://huggingface.co/teknium/OpenHermes-2.5']}
 | 
			
		||||
        expect = gguf.Metadata(datasets=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}])
 | 
			
		||||
        got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None)
 | 
			
		||||
        self.assertEqual(got, expect)
 | 
			
		||||
 | 
			
		||||
        # Dataset spec is given directly
 | 
			
		||||
        model_card = {'datasets': [{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]}
 | 
			
		||||
        expect = gguf.Metadata(datasets=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}])
 | 
			
		||||
        got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None)
 | 
			
		||||
        self.assertEqual(got, expect)
 | 
			
		||||
 | 
			
		||||
    def test_apply_metadata_heuristic_from_hf_parameters(self):
 | 
			
		||||
 
 | 
			
		||||
@@ -136,7 +136,7 @@ def compare_tensors(t1: np.ndarray, t2: np.ndarray, qtype: GGMLQuantizationType)
 | 
			
		||||
        logger.debug(f"Sample bad block ({diff_bits[bad_block_id]} differing bits):\n{t1[bad_block_id]}\nReference:\n{t2[bad_block_id]}")
 | 
			
		||||
 | 
			
		||||
        sum_diff_bits = np.sum(diff_bits)
 | 
			
		||||
        logger.debug(f"{sum_diff_bits} bits differ ({100 * sum_diff_bits/(x.size * 8):.6f}%)")
 | 
			
		||||
        logger.debug(f"{sum_diff_bits} bits differ ({100 * sum_diff_bits / (x.size * 8):.6f}%)")
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user