mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama.cpp : show model size and BPW on load (#3223)
This commit is contained in:
		
							
								
								
									
										12
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -927,6 +927,7 @@ enum e_model { | |||||||
|  |  | ||||||
| static const size_t kB = 1024; | static const size_t kB = 1024; | ||||||
| static const size_t MB = kB*kB; | static const size_t MB = kB*kB; | ||||||
|  | static const size_t GB = kB*kB*kB; | ||||||
|  |  | ||||||
| // default hparams (LLaMA 7B) | // default hparams (LLaMA 7B) | ||||||
| struct llama_hparams { | struct llama_hparams { | ||||||
| @@ -1280,6 +1281,7 @@ struct llama_model_loader { | |||||||
|     int n_created = 0; |     int n_created = 0; | ||||||
|  |  | ||||||
|     int64_t n_elements = 0; |     int64_t n_elements = 0; | ||||||
|  |     size_t  n_bytes    = 0; | ||||||
|  |  | ||||||
|     bool use_mmap = false; |     bool use_mmap = false; | ||||||
|  |  | ||||||
| @@ -1312,6 +1314,7 @@ struct llama_model_loader { | |||||||
|             const char * name = gguf_get_tensor_name(ctx_gguf, i); |             const char * name = gguf_get_tensor_name(ctx_gguf, i); | ||||||
|             struct ggml_tensor * t = ggml_get_tensor(ctx_meta, name); |             struct ggml_tensor * t = ggml_get_tensor(ctx_meta, name); | ||||||
|             n_elements += ggml_nelements(t); |             n_elements += ggml_nelements(t); | ||||||
|  |             n_bytes    += ggml_nbytes(t); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n", |         LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n", | ||||||
| @@ -1909,7 +1912,12 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) { | |||||||
|     LLAMA_LOG_INFO("%s: freq_scale     = %g\n",     __func__, hparams.rope_freq_scale); |     LLAMA_LOG_INFO("%s: freq_scale     = %g\n",     __func__, hparams.rope_freq_scale); | ||||||
|     LLAMA_LOG_INFO("%s: model type     = %s\n",     __func__, llama_model_type_name(model.type)); |     LLAMA_LOG_INFO("%s: model type     = %s\n",     __func__, llama_model_type_name(model.type)); | ||||||
|     LLAMA_LOG_INFO("%s: model ftype    = %s\n",     __func__, llama_model_ftype_name(model.ftype).c_str()); |     LLAMA_LOG_INFO("%s: model ftype    = %s\n",     __func__, llama_model_ftype_name(model.ftype).c_str()); | ||||||
|     LLAMA_LOG_INFO("%s: model size     = %.2f B\n", __func__, ml.n_elements*1e-9); |     LLAMA_LOG_INFO("%s: model params   = %.2f B\n", __func__, ml.n_elements*1e-9); | ||||||
|  |     if (ml.n_bytes < GB) { | ||||||
|  |         LLAMA_LOG_INFO("%s: model size     = %.2f MiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements); | ||||||
|  |     } else { | ||||||
|  |         LLAMA_LOG_INFO("%s: model size     = %.2f GiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // general kv |     // general kv | ||||||
|     LLAMA_LOG_INFO("%s: general.name   = %s\n",    __func__, model.name.c_str()); |     LLAMA_LOG_INFO("%s: general.name   = %s\n",    __func__, model.name.c_str()); | ||||||
| @@ -3495,7 +3503,7 @@ static struct ggml_cgraph * llm_build_starcoder( | |||||||
|  |  | ||||||
|         ggml_allocr_alloc(lctx.alloc, token); |         ggml_allocr_alloc(lctx.alloc, token); | ||||||
|         if (!ggml_allocr_is_measure(lctx.alloc)) { |         if (!ggml_allocr_is_measure(lctx.alloc)) { | ||||||
|             memcpy(token->data, embd, N * n_embd * ggml_element_size(inpL)); |             memcpy(token->data, embd, N * n_embd * ggml_element_size(token)); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren