mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	Add LLaDA-7b-MoE diffusion model (#16003)
This commit is contained in:
		| @@ -888,6 +888,9 @@ class TextModel(ModelBase): | ||||
|         if chkhsh == "a1e163ecab2e718a4c829d1148b6e86824ec36163bb71941c3dca9cd5ac25756": | ||||
|             # ref: https://huggingface.co/JetBrains/Mellum-4b-base | ||||
|             res = "mellum" | ||||
|         if chkhsh == "9b1be57e70d20d9501b2b3186e792d81181ae36ada3903c26f9fea418cf87206": | ||||
|             # ref: https://huggingface.co/inclusionAI/LLaDA-MoE-7B-A1B-Base | ||||
|             res = "llada-moe" | ||||
|  | ||||
|         if res is None: | ||||
|             logger.warning("\n") | ||||
| @@ -8239,6 +8242,76 @@ class HunYuanMoEModel(TextModel): | ||||
|                 raise ValueError(f"Unprocessed experts: {experts}") | ||||
|  | ||||
|  | ||||
| @ModelBase.register("LLaDAMoEModel", "LLaDAMoEModelLM") | ||||
| class LLaDAMoEModel(TextModel): | ||||
|     model_arch = gguf.MODEL_ARCH.LLADA_MOE | ||||
|  | ||||
|     def set_gguf_parameters(self): | ||||
|         super().set_gguf_parameters() | ||||
|         if (n_experts := self.hparams.get("num_experts")) is not None: | ||||
|             self.gguf_writer.add_expert_count(n_experts) | ||||
|  | ||||
|         if (expert_intermediate_size := self.hparams.get("expert_intermediate_size")) is not None: | ||||
|             self.gguf_writer.add_expert_feed_forward_length(expert_intermediate_size) | ||||
|  | ||||
|         # number of experts used per token (top-k) | ||||
|         if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None: | ||||
|             self.gguf_writer.add_expert_used_count(n_experts_used) | ||||
|  | ||||
|         self.gguf_writer.add_mask_token_id(156895) | ||||
|         self.gguf_writer.add_causal_attention(False) | ||||
|         self.gguf_writer.add_diffusion_shift_logits(False) | ||||
|  | ||||
|     _experts: list[dict[str, Tensor]] | None = None | ||||
|  | ||||
|     # Copied from: Qwen2MoeModel | ||||
|     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: | ||||
|         # process the experts separately | ||||
|         if name.find("experts") != -1: | ||||
|             n_experts = self.hparams["num_experts"] | ||||
|             assert bid is not None | ||||
|  | ||||
|             if self._experts is None: | ||||
|                 self._experts = [{} for _ in range(self.block_count)] | ||||
|  | ||||
|             self._experts[bid][name] = data_torch | ||||
|  | ||||
|             if len(self._experts[bid]) >= n_experts * 3: | ||||
|                 tensors: list[tuple[str, Tensor]] = [] | ||||
|  | ||||
|                 # merge the experts into a single 3d tensor | ||||
|                 for w_name in ["down_proj", "gate_proj", "up_proj"]: | ||||
|                     datas: list[Tensor] = [] | ||||
|  | ||||
|                     for xid in range(n_experts): | ||||
|                         ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight" | ||||
|                         datas.append(self._experts[bid][ename]) | ||||
|                         del self._experts[bid][ename] | ||||
|  | ||||
|                     data_torch = torch.stack(datas, dim=0) | ||||
|  | ||||
|                     merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight" | ||||
|  | ||||
|                     new_name = self.map_tensor_name(merged_name) | ||||
|  | ||||
|                     tensors.append((new_name, data_torch)) | ||||
|                 return tensors | ||||
|             else: | ||||
|                 return [] | ||||
|  | ||||
|         return [(self.map_tensor_name(name), data_torch)] | ||||
|  | ||||
|     # Copied from: Qwen2MoeModel | ||||
|     def prepare_tensors(self): | ||||
|         super().prepare_tensors() | ||||
|  | ||||
|         if self._experts is not None: | ||||
|             # flatten `list[dict[str, Tensor]]` into `list[str]` | ||||
|             experts = [k for d in self._experts for k in d.keys()] | ||||
|             if len(experts) > 0: | ||||
|                 raise ValueError(f"Unprocessed experts: {experts}") | ||||
|  | ||||
|  | ||||
| @ModelBase.register("HunYuanDenseV1ForCausalLM") | ||||
| class HunYuanModel(TextModel): | ||||
|     model_arch = gguf.MODEL_ARCH.HUNYUAN_DENSE | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Aman Gupta
					Aman Gupta