From 9d3ef4809fc0039ee80acc92cd0cb854b544dffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sat, 15 Nov 2025 14:06:24 +0100 Subject: [PATCH] convert : set expert gating func in base class (#17279) --- convert_hf_to_gguf.py | 43 +++++++++---------------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 2b08013e1e..43f74cfb14 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -825,6 +825,15 @@ class TextModel(ModelBase): self.gguf_writer.add_expert_group_used_count(n_group_used) logger.info(f"gguf: expert groups used count = {n_group_used}") + if (score_func := self.find_hparam(["score_function", "scoring_func", "score_func"], optional=True)) is not None: + if score_func == "sigmoid": + self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID) + elif score_func == "softmax": + self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX) + else: + raise ValueError(f"Unsupported expert score gating function value: {score_func}") + logger.info(f"gguf: expert score gating function = {score_func}") + if (head_dim := self.hparams.get("head_dim")) is not None: self.gguf_writer.add_key_length(head_dim) self.gguf_writer.add_value_length(head_dim) @@ -2553,15 +2562,6 @@ class AfmoeModel(LlamaModel): if (n_dense_layers := self.hparams.get("num_dense_layers")) is not None: self.gguf_writer.add_leading_dense_block_count(n_dense_layers) - # Expert Gating Function - score_func = self.hparams.get("score_func") - if score_func == "sigmoid": - self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID) - elif score_func == "softmax": - self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX) - elif score_func is not None: - raise ValueError(f"Unsupported score_function value: {score_func}") - # Route normalization and scaling if (route_norm := self.hparams.get("route_norm")) is not None: self.gguf_writer.add_expert_weights_norm(route_norm) @@ -7182,13 +7182,6 @@ class DeepseekV2Model(TextModel): self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"]) self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"]) - if hparams["scoring_func"] == "sigmoid": - self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID) - elif hparams["scoring_func"] == "softmax": - self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX) - else: - raise ValueError(f"Unsupported scoring_func value: {hparams['scoring_func']}") - self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"]) rope_scaling = self.hparams.get("rope_scaling") or {} @@ -7294,12 +7287,6 @@ class MiniMaxM2Model(TextModel): def set_gguf_parameters(self): super().set_gguf_parameters() - if self.hparams["scoring_func"] == "sigmoid": - self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID) - elif self.hparams["scoring_func"] == "softmax": - self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX) - else: - raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}") self.gguf_writer.add_expert_feed_forward_length(self.find_hparam(["intermediate_size"])) self.gguf_writer.add_rope_dimension_count(self.find_hparam(["rotary_dim"])) @@ -7392,11 +7379,6 @@ class Dots1Model(Qwen2MoeModel): self.gguf_writer.add_expert_weights_scale(self.hparams["routed_scaling_factor"]) self.gguf_writer.add_expert_weights_norm(self.hparams["norm_topk_prob"]) - if self.hparams["scoring_func"] == "noaux_tc": - self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID) - else: - raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}") - def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None): if name.endswith("e_score_correction_bias"): name = name.replace("e_score_correction_bias", "e_score_correction.bias") @@ -8717,13 +8699,6 @@ class BailingMoeV2Model(TextModel): self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"]) self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"]) - if hparams["score_function"] == "sigmoid": - self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID) - elif hparams["score_function"] == "softmax": - self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX) - else: - raise ValueError(f"Unsupported score_function value: {hparams['score_function']}") - if (nextn_layers := self.hparams.get("num_nextn_predict_layers")) is not None: self.gguf_writer.add_nextn_predict_layers(nextn_layers)