From 9a048d8de97c5091bd7649c2bf215629864546db Mon Sep 17 00:00:00 2001 From: ibrahimkhadraoui Date: Tue, 8 Jul 2025 11:45:58 +0400 Subject: [PATCH] flake8 fixes --- convert_hf_to_gguf.py | 14 ++------------ gguf-py/gguf/tensor_mapping.py | 14 +++++++------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 27bdf1fd99..4388f2462b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -686,14 +686,6 @@ class TextModel(ModelBase): if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e": # ref: https://huggingface.co/tiiuae/Falcon3-7B-Base res = "falcon3" - if ( - chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86" or - chkhsh == "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896" or - chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b" or - chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6" - ): - # ref: https://huggingface.co/collections/tiiuae/falcon-h1-6819f2795bc406da60fab8df - res = "falcon_h1" if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7": # ref: https://huggingface.co/BAAI/bge-large-zh-v1.5 res = "bert-bge-large" @@ -6608,7 +6600,7 @@ class FalconH1Model(Mamba2Model): def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: tensors = list(super().modify_tensors(data_torch, name, bid)) - tensor = tensors[0][1] + tensor = tensors[0][1] if "down_proj" in name: tensor = tensor * self.mlp_multipliers[1] @@ -6669,9 +6661,7 @@ class FalconH1Model(Mamba2Model): ## Validation ## assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported" assert self.d_inner % d_head == 0, f"SSM inner size {self.d_inner} not a multiple of head dim {d_head}" - self.gguf_writer.add_head_count_kv(self.find_hparam(["num_key_value_heads"], optional=True) or - self.find_hparam(["num_attention_heads"])) - + self.gguf_writer.add_head_count_kv(self.find_hparam(["num_key_value_heads"], optional=True) or self.find_hparam(["num_attention_heads"])) # Add any other Falcon Mamba2 specific configuration self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"])) diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index b8b081a90b..f275e9d481 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -293,7 +293,7 @@ class TensorNameMap: MODEL_TENSOR.FFN_POST_NORM: ( "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2 "model.layers.{bid}.post_mlp_layernorm", # glm-4-0414 - "model.layers.{bid}.feed_forward.up_proj", + "model.layers.{bid}.feed_forward.up_proj", ), MODEL_TENSOR.FFN_GATE_INP: ( @@ -364,7 +364,7 @@ class TensorNameMap: "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2 "model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4 - "model.layers.{bid}.feed_forward.down_proj", + "model.layers.{bid}.feed_forward.down_proj", ), # AWQ-activation gate @@ -550,13 +550,13 @@ class TensorNameMap: MODEL_TENSOR.SSM_IN: ( "model.layers.{bid}.in_proj", "backbone.layers.{bid}.mixer.in_proj", - "model.layers.{bid}.mamba.in_proj", + "model.layers.{bid}.mamba.in_proj", ), MODEL_TENSOR.SSM_CONV1D: ( "model.layers.{bid}.conv1d", "backbone.layers.{bid}.mixer.conv1d", - "model.layers.{bid}.mamba.conv1d", + "model.layers.{bid}.mamba.conv1d", ), MODEL_TENSOR.SSM_X: ( @@ -567,13 +567,13 @@ class TensorNameMap: MODEL_TENSOR.SSM_DT: ( "model.layers.{bid}.dt_proj", "backbone.layers.{bid}.mixer.dt_proj", - "model.layers.{bid}.mamba.dt_proj", + "model.layers.{bid}.mamba.dt_proj", ), MODEL_TENSOR.SSM_A: ( "model.layers.{bid}.A_log", "backbone.layers.{bid}.mixer.A_log", - "model.layers.{bid}.mamba.A_log", + "model.layers.{bid}.mamba.A_log", ), MODEL_TENSOR.SSM_D: ( @@ -1177,7 +1177,7 @@ class TensorNameMap: MODEL_TENSOR.V_RESMPL_ATTN_OUT: ( "resampler.attn.out_proj", ), - + MODEL_TENSOR.V_RESMPL_KV: ( "resampler.kv_proj", ),