diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 65017ab705..bdf7363964 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -686,7 +686,12 @@ class TextModel(ModelBase): if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e": # ref: https://huggingface.co/tiiuae/Falcon3-7B-Base res = "falcon3" - if chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86": + if ( + chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86" or + chkhsh == "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896" or + chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b" or + chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6" + ): # ref: https://huggingface.co/collections/tiiuae/falcon-h1-6819f2795bc406da60fab8df res = "falcon_h1" if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7": diff --git a/src/llama-model.cpp b/src/llama-model.cpp index fe7ba4f9ac..661256df64 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -14802,10 +14802,9 @@ struct llm_build_falcon_h1 : public llm_graph_context { const int64_t d_conv = hparams.ssm_d_conv; const int64_t d_ssm = hparams.ssm_mamba_d_ssm; - const int64_t d_inner = hparams.ssm_d_inner; const int64_t d_state = hparams.ssm_d_state; const int64_t n_head = hparams.ssm_dt_rank; - const int64_t head_dim = hparams.ssm_head_dim == 0 ? d_inner / n_head : hparams.ssm_head_dim; + const int64_t head_dim = hparams.ssm_head_dim == 0 ? d_ssm / n_head : hparams.ssm_head_dim; const int64_t n_group = hparams.ssm_n_group; const int64_t n_seqs = ubatch.n_seqs;