mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	fix convert.py for codellama, add llama 34B to the list of recognized models (#2768)
This commit is contained in:
		| @@ -191,7 +191,7 @@ class Params: | |||||||
|     def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params': |     def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params': | ||||||
|         config = json.load(open(config_path)) |         config = json.load(open(config_path)) | ||||||
|  |  | ||||||
|         n_vocab    = config["vocab_size"] |         n_vocab    = config["vocab_size"] if "vocab_size" in config else -1 | ||||||
|         n_embd     = config["dim"] |         n_embd     = config["dim"] | ||||||
|         n_layer    = config["n_layers"] |         n_layer    = config["n_layers"] | ||||||
|         n_mult     = config["multiple_of"] |         n_mult     = config["multiple_of"] | ||||||
|   | |||||||
| @@ -827,6 +827,7 @@ enum e_model { | |||||||
|     MODEL_7B, |     MODEL_7B, | ||||||
|     MODEL_13B, |     MODEL_13B, | ||||||
|     MODEL_30B, |     MODEL_30B, | ||||||
|  |     MODEL_34B, | ||||||
|     MODEL_40B, |     MODEL_40B, | ||||||
|     MODEL_65B, |     MODEL_65B, | ||||||
|     MODEL_70B, |     MODEL_70B, | ||||||
| @@ -1518,6 +1519,7 @@ static const char * llama_model_type_name(e_model type) { | |||||||
|         case MODEL_7B:  return "7B"; |         case MODEL_7B:  return "7B"; | ||||||
|         case MODEL_13B: return "13B"; |         case MODEL_13B: return "13B"; | ||||||
|         case MODEL_30B: return "30B"; |         case MODEL_30B: return "30B"; | ||||||
|  |         case MODEL_34B: return "34B"; | ||||||
|         case MODEL_40B: return "40B"; |         case MODEL_40B: return "40B"; | ||||||
|         case MODEL_65B: return "65B"; |         case MODEL_65B: return "65B"; | ||||||
|         case MODEL_70B: return "70B"; |         case MODEL_70B: return "70B"; | ||||||
| @@ -1590,6 +1592,7 @@ static void llm_load_hparams( | |||||||
|                     case 26: model.type = e_model::MODEL_3B; break; |                     case 26: model.type = e_model::MODEL_3B; break; | ||||||
|                     case 32: model.type = e_model::MODEL_7B; break; |                     case 32: model.type = e_model::MODEL_7B; break; | ||||||
|                     case 40: model.type = e_model::MODEL_13B; break; |                     case 40: model.type = e_model::MODEL_13B; break; | ||||||
|  |                     case 48: model.type = e_model::MODEL_34B; break; | ||||||
|                     case 60: model.type = e_model::MODEL_30B; break; |                     case 60: model.type = e_model::MODEL_30B; break; | ||||||
|                     case 80: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break; |                     case 80: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break; | ||||||
|                     default: model.type = e_model::MODEL_UNKNOWN; |                     default: model.type = e_model::MODEL_UNKNOWN; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren