flake8 fixes

This commit is contained in:
ibrahimkhadraoui
2025-07-08 11:45:58 +04:00
parent adff470c8a
commit 9a048d8de9
2 changed files with 9 additions and 19 deletions

View File

@@ -686,14 +686,6 @@ class TextModel(ModelBase):
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
res = "falcon3"
if (
chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86" or
chkhsh == "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896" or
chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b" or
chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6"
):
# ref: https://huggingface.co/collections/tiiuae/falcon-h1-6819f2795bc406da60fab8df
res = "falcon_h1"
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
res = "bert-bge-large"
@@ -6608,7 +6600,7 @@ class FalconH1Model(Mamba2Model):
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
tensors = list(super().modify_tensors(data_torch, name, bid))
tensor = tensors[0][1]
tensor = tensors[0][1]
if "down_proj" in name:
tensor = tensor * self.mlp_multipliers[1]
@@ -6669,9 +6661,7 @@ class FalconH1Model(Mamba2Model):
## Validation ##
assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported"
assert self.d_inner % d_head == 0, f"SSM inner size {self.d_inner} not a multiple of head dim {d_head}"
self.gguf_writer.add_head_count_kv(self.find_hparam(["num_key_value_heads"], optional=True) or
self.find_hparam(["num_attention_heads"]))
self.gguf_writer.add_head_count_kv(self.find_hparam(["num_key_value_heads"], optional=True) or self.find_hparam(["num_attention_heads"]))
# Add any other Falcon Mamba2 specific configuration
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))

View File

@@ -293,7 +293,7 @@ class TensorNameMap:
MODEL_TENSOR.FFN_POST_NORM: (
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
"model.layers.{bid}.feed_forward.up_proj",
"model.layers.{bid}.feed_forward.up_proj",
),
MODEL_TENSOR.FFN_GATE_INP: (
@@ -364,7 +364,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
"model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
"model.layers.{bid}.feed_forward.down_proj",
"model.layers.{bid}.feed_forward.down_proj",
),
# AWQ-activation gate
@@ -550,13 +550,13 @@ class TensorNameMap:
MODEL_TENSOR.SSM_IN: (
"model.layers.{bid}.in_proj",
"backbone.layers.{bid}.mixer.in_proj",
"model.layers.{bid}.mamba.in_proj",
"model.layers.{bid}.mamba.in_proj",
),
MODEL_TENSOR.SSM_CONV1D: (
"model.layers.{bid}.conv1d",
"backbone.layers.{bid}.mixer.conv1d",
"model.layers.{bid}.mamba.conv1d",
"model.layers.{bid}.mamba.conv1d",
),
MODEL_TENSOR.SSM_X: (
@@ -567,13 +567,13 @@ class TensorNameMap:
MODEL_TENSOR.SSM_DT: (
"model.layers.{bid}.dt_proj",
"backbone.layers.{bid}.mixer.dt_proj",
"model.layers.{bid}.mamba.dt_proj",
"model.layers.{bid}.mamba.dt_proj",
),
MODEL_TENSOR.SSM_A: (
"model.layers.{bid}.A_log",
"backbone.layers.{bid}.mixer.A_log",
"model.layers.{bid}.mamba.A_log",
"model.layers.{bid}.mamba.A_log",
),
MODEL_TENSOR.SSM_D: (
@@ -1177,7 +1177,7 @@ class TensorNameMap:
MODEL_TENSOR.V_RESMPL_ATTN_OUT: (
"resampler.attn.out_proj",
),
MODEL_TENSOR.V_RESMPL_KV: (
"resampler.kv_proj",
),