mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : add llama_vocab, functions -> methods, naming (#11110)
				
					
				
			* llama : functions -> methods (#11110) * llama : add struct llama_vocab to the API (#11156) ggml-ci * hparams : move vocab params to llama_vocab (#11159) ggml-ci * vocab : more pimpl (#11165) ggml-ci * vocab : minor tokenization optimizations (#11160) ggml-ci Co-authored-by: Diego Devesa <slarengh@gmail.com> * lora : update API names (#11167) ggml-ci * llama : update API names to use correct prefix (#11174) * llama : update API names to use correct prefix ggml-ci * cont ggml-ci * cont ggml-ci * minor [no ci] * vocab : llama_vocab_add_[be]os -> llama_vocab_get_add_[be]os (#11174) ggml-ci * vocab : llama_vocab_n_vocab -> llama_vocab_n_tokens (#11174) ggml-ci --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
		| @@ -7,6 +7,10 @@ | ||||
| #include <cstring> | ||||
| #include <future> | ||||
|  | ||||
| static const size_t kiB = 1024; | ||||
| static const size_t MiB = 1024*kiB; | ||||
| static const size_t GiB = 1024*MiB; | ||||
|  | ||||
| const char * llama_file_version_name(llama_fver version) { | ||||
|     switch (version) { | ||||
|         case GGUF_FILE_VERSION_V1: return "GGUF V1 (support until nov 2023)"; | ||||
| @@ -17,6 +21,49 @@ const char * llama_file_version_name(llama_fver version) { | ||||
|     return "unknown"; | ||||
| } | ||||
|  | ||||
| static std::string llama_model_ftype_name(llama_ftype ftype) { | ||||
|     if (ftype & LLAMA_FTYPE_GUESSED) { | ||||
|         return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)"; | ||||
|     } | ||||
|  | ||||
|     switch (ftype) { | ||||
|         case LLAMA_FTYPE_ALL_F32:         return "all F32"; | ||||
|         case LLAMA_FTYPE_MOSTLY_F16:      return "F16"; | ||||
|         case LLAMA_FTYPE_MOSTLY_BF16:     return "BF16"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_0:     return "Q4_0"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_1:     return "Q4_1"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_0:     return "Q5_0"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_1:     return "Q5_1"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q8_0:     return "Q8_0"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K:     return "Q2_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K_S:   return "Q2_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_S:   return "Q3_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_M:   return "Q3_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_L:   return "Q3_K - Large"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_S:   return "Q4_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_M:   return "Q4_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_S:   return "Q5_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_M:   return "Q5_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q6_K:     return "Q6_K"; | ||||
|         case LLAMA_FTYPE_MOSTLY_TQ1_0:    return "TQ1_0 - 1.69 bpw ternary"; | ||||
|         case LLAMA_FTYPE_MOSTLY_TQ2_0:    return "TQ2_0 - 2.06 bpw ternary"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_XXS:  return "IQ2_XXS - 2.0625 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_XS:   return "IQ2_XS - 2.3125 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_S:    return "IQ2_S - 2.5 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_M:    return "IQ2_M - 2.7 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_XS:   return "IQ3_XS - 3.3 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_XXS:  return "IQ3_XXS - 3.0625 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ1_S:    return "IQ1_S - 1.5625 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ1_M:    return "IQ1_M - 1.75 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ4_NL:   return "IQ4_NL - 4.5 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ4_XS:   return "IQ4_XS - 4.25 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_S:    return "IQ3_S - 3.4375 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_M:    return "IQ3_S mix - 3.66 bpw"; | ||||
|  | ||||
|         default: return "unknown, may not work"; | ||||
|     } | ||||
| } | ||||
|  | ||||
| namespace GGUFMeta { | ||||
|     template <typename T, gguf_type gt_, T (*gfun)(const gguf_context *, const int64_t)> | ||||
|     struct GKV_Base_Type { | ||||
| @@ -1009,3 +1056,17 @@ bool llama_model_loader::load_all_data( | ||||
|  | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| std::string llama_model_loader::ftype_name() const { | ||||
|     return llama_model_ftype_name(ftype); | ||||
| } | ||||
|  | ||||
| void llama_model_loader::print_info() const { | ||||
|     LLAMA_LOG_INFO("%s: file format = %s\n", __func__, llama_file_version_name(fver)); | ||||
|     LLAMA_LOG_INFO("%s: file type   = %s\n", __func__, llama_model_ftype_name(ftype).c_str()); | ||||
|     if (n_bytes < GiB) { | ||||
|         LLAMA_LOG_INFO("%s: file size   = %.2f MiB (%.2f BPW) \n", __func__, n_bytes/1024.0/1024.0,        n_bytes*8.0/n_elements); | ||||
|     } else { | ||||
|         LLAMA_LOG_INFO("%s: file size   = %.2f GiB (%.2f BPW) \n", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements); | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov