mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Add LLaDA 8b Diffusion model (#14771)
* Add support for Llada-8b: diffusion model * Add README * Fix README and convert_hf_to_gguf * convert_hf_to_gguf.py: address review comments * Make everything in a single example * Remove model-specific sampling * Remove unused argmax * Remove braced initializers, improve README.md a bit * Add diffusion specific gguf params in set_vocab, remove setting rope_theta and rms_norm_eps * Remove adding the mask token * Move add_add_bos_token to set_vocab * use add_bool in gguf_writer.py
This commit is contained in:
		| @@ -89,6 +89,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { | ||||
|     { LLM_ARCH_LFM2,             "lfm2"             }, | ||||
|     { LLM_ARCH_DREAM,            "dream"            }, | ||||
|     { LLM_ARCH_SMALLTHINKER,     "smallthinker"     }, | ||||
|     { LLM_ARCH_LLADA,            "llada"            }, | ||||
|     { LLM_ARCH_UNKNOWN,          "(unknown)"        }, | ||||
| }; | ||||
|  | ||||
| @@ -1972,6 +1973,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_LLADA, | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" }, | ||||
|             { LLM_TENSOR_OUTPUT,          "output" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" }, | ||||
|             { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" }, | ||||
|             { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" }, | ||||
|             { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" }, | ||||
|             { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" }, | ||||
|             { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" }, | ||||
|             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_UNKNOWN, | ||||
|         { | ||||
| @@ -2224,6 +2242,7 @@ bool llm_arch_is_hybrid(const llm_arch & arch) { | ||||
| bool llm_arch_is_diffusion(const llm_arch & arch) { | ||||
|     switch (arch) { | ||||
|         case LLM_ARCH_DREAM: | ||||
|         case LLM_ARCH_LLADA: | ||||
|             return true; | ||||
|         default: | ||||
|             return false; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Aman Gupta
					Aman Gupta