mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-18 11:46:58 +00:00
convert : set expert gating func in base class (#17279)
This commit is contained in:
@@ -825,6 +825,15 @@ class TextModel(ModelBase):
|
|||||||
self.gguf_writer.add_expert_group_used_count(n_group_used)
|
self.gguf_writer.add_expert_group_used_count(n_group_used)
|
||||||
logger.info(f"gguf: expert groups used count = {n_group_used}")
|
logger.info(f"gguf: expert groups used count = {n_group_used}")
|
||||||
|
|
||||||
|
if (score_func := self.find_hparam(["score_function", "scoring_func", "score_func"], optional=True)) is not None:
|
||||||
|
if score_func == "sigmoid":
|
||||||
|
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
|
||||||
|
elif score_func == "softmax":
|
||||||
|
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported expert score gating function value: {score_func}")
|
||||||
|
logger.info(f"gguf: expert score gating function = {score_func}")
|
||||||
|
|
||||||
if (head_dim := self.hparams.get("head_dim")) is not None:
|
if (head_dim := self.hparams.get("head_dim")) is not None:
|
||||||
self.gguf_writer.add_key_length(head_dim)
|
self.gguf_writer.add_key_length(head_dim)
|
||||||
self.gguf_writer.add_value_length(head_dim)
|
self.gguf_writer.add_value_length(head_dim)
|
||||||
@@ -2553,15 +2562,6 @@ class AfmoeModel(LlamaModel):
|
|||||||
if (n_dense_layers := self.hparams.get("num_dense_layers")) is not None:
|
if (n_dense_layers := self.hparams.get("num_dense_layers")) is not None:
|
||||||
self.gguf_writer.add_leading_dense_block_count(n_dense_layers)
|
self.gguf_writer.add_leading_dense_block_count(n_dense_layers)
|
||||||
|
|
||||||
# Expert Gating Function
|
|
||||||
score_func = self.hparams.get("score_func")
|
|
||||||
if score_func == "sigmoid":
|
|
||||||
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
|
|
||||||
elif score_func == "softmax":
|
|
||||||
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
|
|
||||||
elif score_func is not None:
|
|
||||||
raise ValueError(f"Unsupported score_function value: {score_func}")
|
|
||||||
|
|
||||||
# Route normalization and scaling
|
# Route normalization and scaling
|
||||||
if (route_norm := self.hparams.get("route_norm")) is not None:
|
if (route_norm := self.hparams.get("route_norm")) is not None:
|
||||||
self.gguf_writer.add_expert_weights_norm(route_norm)
|
self.gguf_writer.add_expert_weights_norm(route_norm)
|
||||||
@@ -7182,13 +7182,6 @@ class DeepseekV2Model(TextModel):
|
|||||||
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
|
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
|
||||||
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
|
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
|
||||||
|
|
||||||
if hparams["scoring_func"] == "sigmoid":
|
|
||||||
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
|
|
||||||
elif hparams["scoring_func"] == "softmax":
|
|
||||||
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported scoring_func value: {hparams['scoring_func']}")
|
|
||||||
|
|
||||||
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
|
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
|
||||||
|
|
||||||
rope_scaling = self.hparams.get("rope_scaling") or {}
|
rope_scaling = self.hparams.get("rope_scaling") or {}
|
||||||
@@ -7294,12 +7287,6 @@ class MiniMaxM2Model(TextModel):
|
|||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
super().set_gguf_parameters()
|
super().set_gguf_parameters()
|
||||||
if self.hparams["scoring_func"] == "sigmoid":
|
|
||||||
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
|
|
||||||
elif self.hparams["scoring_func"] == "softmax":
|
|
||||||
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}")
|
|
||||||
|
|
||||||
self.gguf_writer.add_expert_feed_forward_length(self.find_hparam(["intermediate_size"]))
|
self.gguf_writer.add_expert_feed_forward_length(self.find_hparam(["intermediate_size"]))
|
||||||
self.gguf_writer.add_rope_dimension_count(self.find_hparam(["rotary_dim"]))
|
self.gguf_writer.add_rope_dimension_count(self.find_hparam(["rotary_dim"]))
|
||||||
@@ -7392,11 +7379,6 @@ class Dots1Model(Qwen2MoeModel):
|
|||||||
self.gguf_writer.add_expert_weights_scale(self.hparams["routed_scaling_factor"])
|
self.gguf_writer.add_expert_weights_scale(self.hparams["routed_scaling_factor"])
|
||||||
self.gguf_writer.add_expert_weights_norm(self.hparams["norm_topk_prob"])
|
self.gguf_writer.add_expert_weights_norm(self.hparams["norm_topk_prob"])
|
||||||
|
|
||||||
if self.hparams["scoring_func"] == "noaux_tc":
|
|
||||||
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}")
|
|
||||||
|
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
|
||||||
if name.endswith("e_score_correction_bias"):
|
if name.endswith("e_score_correction_bias"):
|
||||||
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
|
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
|
||||||
@@ -8717,13 +8699,6 @@ class BailingMoeV2Model(TextModel):
|
|||||||
self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
|
self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
|
||||||
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
|
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
|
||||||
|
|
||||||
if hparams["score_function"] == "sigmoid":
|
|
||||||
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
|
|
||||||
elif hparams["score_function"] == "softmax":
|
|
||||||
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported score_function value: {hparams['score_function']}")
|
|
||||||
|
|
||||||
if (nextn_layers := self.hparams.get("num_nextn_predict_layers")) is not None:
|
if (nextn_layers := self.hparams.get("num_nextn_predict_layers")) is not None:
|
||||||
self.gguf_writer.add_nextn_predict_layers(nextn_layers)
|
self.gguf_writer.add_nextn_predict_layers(nextn_layers)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user