mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : Add support for DeepSeek V3 (#11049)
* convert : extend DEEPSEEK2 model architecture to support DeepseekV3ForCausalLM by adding EXPERT_WEIGHTS_NORM and EXPERT_GATING_FUNC model parameters and FFN_EXP_PROBS_B tensor type * vocab : add DeepSeek V3 pre-tokenizer regexes * unicode : handle ACCENT_MARK and SYMBOL categories in regex * llama : add DeepSeek V3 chat template, handle new model parameters and tensor types --------- Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
This commit is contained in:
		| @@ -66,6 +66,7 @@ const char * llm_type_name(llm_type type) { | ||||
|         case MODEL_70B:           return "70B"; | ||||
|         case MODEL_236B:          return "236B"; | ||||
|         case MODEL_314B:          return "314B"; | ||||
|         case MODEL_671B:          return "671B"; | ||||
|         case MODEL_SMALL:         return "0.1B"; | ||||
|         case MODEL_MEDIUM:        return "0.4B"; | ||||
|         case MODEL_LARGE:         return "0.8B"; | ||||
| @@ -125,6 +126,14 @@ static std::string llama_model_ftype_name(llama_ftype ftype) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| static const char * llama_expert_gating_func_name(llama_expert_gating_func_type type) { | ||||
|     switch (type) { | ||||
|         case LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX: return "softmax"; | ||||
|         case LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID: return "sigmoid"; | ||||
|         default:                                    return "unknown"; | ||||
|     } | ||||
| } | ||||
|  | ||||
| std::string llama_model_arch_name (const llama_model & model) { | ||||
|     return llm_arch_name(model.arch); | ||||
| } | ||||
| @@ -933,11 +942,19 @@ void llm_load_hparams(llama_model_loader & ml, llama_model & model) { | ||||
|                 ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp); | ||||
|                 ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared); | ||||
|                 ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale); | ||||
|                 ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false); | ||||
|                 ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false); | ||||
|                 if (hparams.expert_gating_func == LLAMA_EXPERT_GATING_FUNC_TYPE_NONE) { | ||||
|                     // for compatibility with existing DeepSeek V2 and V2.5 GGUFs | ||||
|                     // that have no expert_gating_func model parameter set | ||||
|                     hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX; | ||||
|                 } | ||||
|                 ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul); | ||||
|  | ||||
|                 switch (hparams.n_layer) { | ||||
|                     case 27: model.type = e_model::MODEL_16B; break; | ||||
|                     case 60: model.type = e_model::MODEL_236B; break; | ||||
|                     case 61: model.type = e_model::MODEL_671B; break; | ||||
|                     default: model.type = e_model::MODEL_UNKNOWN; | ||||
|                 } | ||||
|             } break; | ||||
| @@ -1259,6 +1276,10 @@ void llm_load_vocab(llama_model_loader & ml, llama_model & model) { | ||||
|                     tokenizer_pre == "deepseek-coder") { | ||||
|                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER; | ||||
|                 vocab.tokenizer_clean_spaces = false; | ||||
|             } else if ( | ||||
|                     tokenizer_pre == "deepseek-v3") { | ||||
|                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM; | ||||
|                 vocab.tokenizer_clean_spaces = false; | ||||
|             } else if ( | ||||
|                     tokenizer_pre == "falcon") { | ||||
|                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON; | ||||
| @@ -1941,6 +1962,8 @@ void llm_load_print_meta(llama_model_loader & ml, llama_model & model) { | ||||
|         LLAMA_LOG_INFO("%s: n_ff_exp             = %d\n",     __func__, hparams.n_ff_exp); | ||||
|         LLAMA_LOG_INFO("%s: n_expert_shared      = %d\n",     __func__, hparams.n_expert_shared); | ||||
|         LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n",   __func__, hparams.expert_weights_scale); | ||||
|         LLAMA_LOG_INFO("%s: expert_weights_norm  = %d\n",     __func__, hparams.expert_weights_norm); | ||||
|         LLAMA_LOG_INFO("%s: expert_gating_func   = %s\n",     __func__, llama_expert_gating_func_name((enum llama_expert_gating_func_type) hparams.expert_gating_func)); | ||||
|         LLAMA_LOG_INFO("%s: rope_yarn_log_mul    = %.4f\n",   __func__, hparams.rope_yarn_log_mul); | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 fairydreaming
					fairydreaming