mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Print model version.
Also improve model type printing, and fix indentation of an unrelated switch statement.
This commit is contained in:
		
							
								
								
									
										69
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										69
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -356,17 +356,17 @@ struct llama_load_tensor { | |||||||
|         LLAMA_ASSERT(shards.size() <= UINT32_MAX); |         LLAMA_ASSERT(shards.size() <= UINT32_MAX); | ||||||
|         uint32_t n_shards = (uint32_t) shards.size(); |         uint32_t n_shards = (uint32_t) shards.size(); | ||||||
|         switch (split_type) { |         switch (split_type) { | ||||||
|         case SPLIT_NONE: |             case SPLIT_NONE: | ||||||
|             ne = first_shard.ne; |                 ne = first_shard.ne; | ||||||
|             break; |                 break; | ||||||
|         case SPLIT_BY_COLUMNS: |             case SPLIT_BY_COLUMNS: | ||||||
|             ne = {checked_mul<uint32_t>(first_shard.ne[0], n_shards), |                 ne = {checked_mul<uint32_t>(first_shard.ne[0], n_shards), | ||||||
|                   first_shard.ne[1]}; |                       first_shard.ne[1]}; | ||||||
|             break; |                 break; | ||||||
|         case SPLIT_BY_ROWS: |             case SPLIT_BY_ROWS: | ||||||
|             ne = {first_shard.ne[0], |                 ne = {first_shard.ne[0], | ||||||
|                   checked_mul<uint32_t>(first_shard.ne[1], n_shards)}; |                       checked_mul<uint32_t>(first_shard.ne[1], n_shards)}; | ||||||
|             break; |                 break; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -806,6 +806,25 @@ bool llama_mlock_supported() { | |||||||
| // model loading | // model loading | ||||||
| // | // | ||||||
|  |  | ||||||
|  | static const char *llama_file_version_name(llama_file_version version) { | ||||||
|  |     switch (version) { | ||||||
|  |         case LLAMA_FILE_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)"; | ||||||
|  |         case LLAMA_FILE_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)"; | ||||||
|  |         case LLAMA_FILE_VERSION_GGJT_V1: return "ggjt v1 (latest)"; | ||||||
|  |         default: LLAMA_ASSERT(false); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static const char *llama_model_type_name(e_model type) { | ||||||
|  |     switch (type) { | ||||||
|  |         case MODEL_7B: return "7B"; | ||||||
|  |         case MODEL_13B: return "13B"; | ||||||
|  |         case MODEL_30B: return "30B"; | ||||||
|  |         case MODEL_65B: return "65B"; | ||||||
|  |         default: LLAMA_ASSERT(false); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| static void llama_model_load_internal( | static void llama_model_load_internal( | ||||||
|         const std::string & fname, |         const std::string & fname, | ||||||
|         llama_context & lctx, |         llama_context & lctx, | ||||||
| @@ -823,8 +842,9 @@ static void llama_model_load_internal( | |||||||
|  |  | ||||||
|     lctx.vocab = std::move(ml->file_loaders.at(0)->vocab); |     lctx.vocab = std::move(ml->file_loaders.at(0)->vocab); | ||||||
|     auto & model = lctx.model; |     auto & model = lctx.model; | ||||||
|  |     model.hparams = ml->file_loaders.at(0)->hparams; | ||||||
|  |     llama_file_version file_version = ml->file_loaders.at(0)->file_version; | ||||||
|     auto & hparams = model.hparams; |     auto & hparams = model.hparams; | ||||||
|     hparams = ml->file_loaders.at(0)->hparams; |  | ||||||
|     uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult; |     uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult; | ||||||
|  |  | ||||||
|     { |     { | ||||||
| @@ -836,18 +856,21 @@ static void llama_model_load_internal( | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         hparams.n_ctx = n_ctx; |         hparams.n_ctx = n_ctx; | ||||||
|  |     } | ||||||
|  |  | ||||||
|         fprintf(stderr, "%s: n_vocab = %u\n",  __func__, hparams.n_vocab); |     { | ||||||
|         fprintf(stderr, "%s: n_ctx   = %u\n",  __func__, hparams.n_ctx); |         fprintf(stderr, "%s: format     = %s\n",  __func__, llama_file_version_name(file_version)); | ||||||
|         fprintf(stderr, "%s: n_embd  = %u\n",  __func__, hparams.n_embd); |         fprintf(stderr, "%s: n_vocab    = %u\n",  __func__, hparams.n_vocab); | ||||||
|         fprintf(stderr, "%s: n_mult  = %u\n",  __func__, hparams.n_mult); |         fprintf(stderr, "%s: n_ctx      = %u\n",  __func__, hparams.n_ctx); | ||||||
|         fprintf(stderr, "%s: n_head  = %u\n",  __func__, hparams.n_head); |         fprintf(stderr, "%s: n_embd     = %u\n",  __func__, hparams.n_embd); | ||||||
|         fprintf(stderr, "%s: n_layer = %u\n",  __func__, hparams.n_layer); |         fprintf(stderr, "%s: n_mult     = %u\n",  __func__, hparams.n_mult); | ||||||
|         fprintf(stderr, "%s: n_rot   = %u\n",  __func__, hparams.n_rot); |         fprintf(stderr, "%s: n_head     = %u\n",  __func__, hparams.n_head); | ||||||
|         fprintf(stderr, "%s: f16     = %u\n",  __func__, hparams.f16); |         fprintf(stderr, "%s: n_layer    = %u\n",  __func__, hparams.n_layer); | ||||||
|         fprintf(stderr, "%s: n_ff    = %u\n",  __func__, n_ff); |         fprintf(stderr, "%s: n_rot      = %u\n",  __func__, hparams.n_rot); | ||||||
|         fprintf(stderr, "%s: n_parts = %zu\n", __func__, ml->file_loaders.size()); |         fprintf(stderr, "%s: f16        = %u\n",  __func__, hparams.f16); | ||||||
|         fprintf(stderr, "%s: type    = %u\n",  __func__, model.type); |         fprintf(stderr, "%s: n_ff       = %u\n",  __func__, n_ff); | ||||||
|  |         fprintf(stderr, "%s: n_parts    = %zu\n", __func__, ml->file_loaders.size()); | ||||||
|  |         fprintf(stderr, "%s: model size = %s\n",  __func__, llama_model_type_name(model.type)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (vocab_only) { |     if (vocab_only) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 comex
					comex