mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : add model types for mixtral (#6589)
This commit is contained in:
		
							
								
								
									
										30
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										30
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -1705,6 +1705,8 @@ enum e_model { | ||||
|     MODEL_MEDIUM, | ||||
|     MODEL_LARGE, | ||||
|     MODEL_XL, | ||||
|     MODEL_8x7B, | ||||
|     MODEL_8x22B, | ||||
| }; | ||||
|  | ||||
| static const size_t kiB = 1024; | ||||
| @@ -3558,6 +3560,8 @@ static const char * llama_model_type_name(e_model type) { | ||||
|         case MODEL_MEDIUM: return "0.4B"; | ||||
|         case MODEL_LARGE:  return "0.8B"; | ||||
|         case MODEL_XL:     return "1.5B"; | ||||
|         case MODEL_8x7B:   return "8x7B"; | ||||
|         case MODEL_8x22B:  return "8x22B"; | ||||
|         default:           return "?B"; | ||||
|     } | ||||
| } | ||||
| @@ -3672,15 +3676,23 @@ static void llm_load_hparams( | ||||
|             { | ||||
|                 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); | ||||
|  | ||||
|                 switch (hparams.n_layer) { | ||||
|                     case 22: model.type = e_model::MODEL_1B; break; | ||||
|                     case 26: model.type = e_model::MODEL_3B; break; | ||||
|                     case 32: model.type = e_model::MODEL_7B; break; | ||||
|                     case 40: model.type = e_model::MODEL_13B; break; | ||||
|                     case 48: model.type = e_model::MODEL_34B; break; | ||||
|                     case 60: model.type = e_model::MODEL_30B; break; | ||||
|                     case 80: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break; | ||||
|                     default: model.type = e_model::MODEL_UNKNOWN; | ||||
|                 if (hparams.n_expert == 8) { | ||||
|                     switch (hparams.n_layer) { | ||||
|                         case 32: model.type = e_model::MODEL_8x7B; break; | ||||
|                         case 56: model.type = e_model::MODEL_8x22B; break; | ||||
|                         default: model.type = e_model::MODEL_UNKNOWN; | ||||
|                     } | ||||
|                 } else { | ||||
|                     switch (hparams.n_layer) { | ||||
|                         case 22: model.type = e_model::MODEL_1B; break; | ||||
|                         case 26: model.type = e_model::MODEL_3B; break; | ||||
|                         case 32: model.type = e_model::MODEL_7B; break; | ||||
|                         case 40: model.type = e_model::MODEL_13B; break; | ||||
|                         case 48: model.type = e_model::MODEL_34B; break; | ||||
|                         case 60: model.type = e_model::MODEL_30B; break; | ||||
|                         case 80: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break; | ||||
|                         default: model.type = e_model::MODEL_UNKNOWN; | ||||
|                     } | ||||
|                 } | ||||
|             } break; | ||||
|         case LLM_ARCH_MINICPM: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren