mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	model : add support for Falcon-H1 family (#14534)
* v1 * push more fixes * another fix * fix * more fixes * minor fix * more cleaning on python code * python fixes * changed precision for multipliers float 32->64 * fixes * another fix * fix * pre-norm -> norm * fix * Revert "fix" This reverts commit243e4d1a50. * fix * small fix ffn_norm * try * mix instead of max * fix vocab size * conflict solve * fixed multipliers * falcon-h1 specefic vocab resolved * read arch from gguf.MODEL_ARCH * mamba_d_ssm added to d_inner find_hparam * remove unused functions from gguf_writer.py * override modify_tensors instead of get_tensors * fix conversion and d_inner * added some cb functions for debugging puposes * inp_out_ids moved outside of layers loop * mup_vec create as float64 * fix rope_theta * injected mup * clean ups * rm extra space * rm unused MAMBA_CHUNK_SIZE * rm unused key * add bos False * changed ROPE_TYPE * cleaning debugging stuff * cleaning debug quant * fix comment * some cleanups * some cleanups * Update src/llama-model-loader.cpp * more cleanups * moe cleanuips * d_ssm -> d_inner; * cleaning unused hparams * cleanup * more cleanups * more cleanups on python conversion; * minor cleanups * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * remove todo * added falcon-h1 * tensor not required * clean * remove unneeded attributes * more cleanups and fixed conversion * remove final_norm * flake8 fixes * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * flake8 fixes * Update src/llama-hparams.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update src/llama-arch.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * added hashes * Update src/llama-arch.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update src/llama-vocab.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * update the update file * Revert "update the update file" This reverts commit082ab4ad2a. * fix: address suggestions * fix: update convert_hf_to_gguf.py * Update gguf-py/gguf/constants.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update src/llama-model-loader.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * d_inner fixed * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * reshaping ssm_norm for 34B * removing generate_mup * remove duplicates metadata keys * rm comment * final comment * fix unused args * fix constants * fix bad merge * Update src/llama-model.cpp Co-authored-by: compilade <git@compilade.net> * falcon-h1: remove unused ssm_in_b and bad merge * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * falcon-h1: fix last comment * Update convert_hf_to_gguf.py Co-authored-by: compilade <git@compilade.net> * falcon-h1: revert add_add_bos(False) * falcon-h1: fix tied weights * falcon-h1: remove whitespace * falcon-h1: fix wrong size param * falcon-h1: fix whitespace issues --------- Co-authored-by: younesbelkada <younes.belkada@tii.ae> Co-authored-by: Younes B <49240599+younesbelkada@users.noreply.github.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> Co-authored-by: compilade <git@compilade.net>
This commit is contained in:
		 ibrahim khadraoui
					ibrahim khadraoui
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							20b7bf8a32
						
					
				
				
					commit
					04655063c4
				
			| @@ -46,6 +46,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { | ||||
|     { LLM_ARCH_STARCODER2,       "starcoder2"       }, | ||||
|     { LLM_ARCH_MAMBA,            "mamba"            }, | ||||
|     { LLM_ARCH_MAMBA2,           "mamba2"           }, | ||||
|     { LLM_ARCH_FALCON_H1,        "falcon-h1"        }, | ||||
|     { LLM_ARCH_XVERSE,           "xverse"           }, | ||||
|     { LLM_ARCH_COMMAND_R,        "command-r"        }, | ||||
|     { LLM_ARCH_COHERE2,          "cohere2"          }, | ||||
| @@ -1024,6 +1025,30 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_SSM_OUT,         "blk.%d.ssm_out" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_FALCON_H1, | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT,          "output" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" }, | ||||
|             { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" }, | ||||
|             { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" }, | ||||
|             { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" }, | ||||
|             { LLM_TENSOR_SSM_IN,          "blk.%d.ssm_in" }, | ||||
|             { LLM_TENSOR_SSM_CONV1D,      "blk.%d.ssm_conv1d" }, | ||||
|             { LLM_TENSOR_SSM_DT,          "blk.%d.ssm_dt" }, | ||||
|             { LLM_TENSOR_SSM_A,           "blk.%d.ssm_a" }, | ||||
|             { LLM_TENSOR_SSM_D,           "blk.%d.ssm_d" }, | ||||
|             { LLM_TENSOR_SSM_NORM,        "blk.%d.ssm_norm" }, | ||||
|             { LLM_TENSOR_SSM_OUT,         "blk.%d.ssm_out" }, | ||||
|             { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" }, | ||||
|             { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" }, | ||||
|             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_XVERSE, | ||||
|         { | ||||
| @@ -1967,9 +1992,10 @@ bool llm_arch_is_recurrent(const llm_arch & arch) { | ||||
| } | ||||
|  | ||||
| bool llm_arch_is_hybrid(const llm_arch & arch) { | ||||
|     // TODO: There are currently no hybrid models! Once there are, this will be | ||||
|     //  the place to identify them | ||||
|     // List all mamba-attention hybrid models here | ||||
|     switch (arch) { | ||||
|         case LLM_ARCH_FALCON_H1: | ||||
|             return true; | ||||
|         default: | ||||
|             return false; | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user