mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : consistently catch and throw only exceptions deriving from std::exception (#1599)
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		
							
								
								
									
										59
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										59
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -289,15 +289,15 @@ template <typename T> | |||||||
| static T checked_mul(T a, T b) { | static T checked_mul(T a, T b) { | ||||||
|     T ret = a * b; |     T ret = a * b; | ||||||
|     if (a != 0 && ret / a != b) { |     if (a != 0 && ret / a != b) { | ||||||
|         throw format("overflow multiplying %llu * %llu", |         throw std::runtime_error(format("overflow multiplying %llu * %llu", | ||||||
|                      (unsigned long long) a, (unsigned long long) b); |                      (unsigned long long) a, (unsigned long long) b)); | ||||||
|     } |     } | ||||||
|     return ret; |     return ret; | ||||||
| } | } | ||||||
|  |  | ||||||
| static size_t checked_div(size_t a, size_t b) { | static size_t checked_div(size_t a, size_t b) { | ||||||
|     if (b == 0 || a % b != 0) { |     if (b == 0 || a % b != 0) { | ||||||
|         throw format("error dividing %zu / %zu", a, b); |         throw std::runtime_error(format("error dividing %zu / %zu", a, b)); | ||||||
|     } |     } | ||||||
|     return a / b; |     return a / b; | ||||||
| } | } | ||||||
| @@ -361,7 +361,7 @@ struct llama_load_tensor { | |||||||
|         const auto & first_shard = shards.at(0); |         const auto & first_shard = shards.at(0); | ||||||
|         for (const auto & shard : shards) { |         for (const auto & shard : shards) { | ||||||
|             if (shard.type != first_shard.type) { |             if (shard.type != first_shard.type) { | ||||||
|                 throw format("inconsistent tensor shard type in '%s'", name.c_str()); |                 throw std::runtime_error(format("inconsistent tensor shard type in '%s'", name.c_str())); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         type = first_shard.type; |         type = first_shard.type; | ||||||
| @@ -384,8 +384,8 @@ struct llama_load_tensor { | |||||||
|         const auto & first_shard = shards.at(0); |         const auto & first_shard = shards.at(0); | ||||||
|         for (const auto & shard : shards) { |         for (const auto & shard : shards) { | ||||||
|             if (shard.ne != first_shard.ne) { |             if (shard.ne != first_shard.ne) { | ||||||
|                 throw format("inconsistent tensor shard shape in '%s': first was %s, other was %s", |                 throw std::runtime_error(format("inconsistent tensor shard shape in '%s': first was %s, other was %s", | ||||||
|                              name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), llama_format_tensor_shape(shard.ne).c_str()); |                              name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), llama_format_tensor_shape(shard.ne).c_str())); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         ne = first_shard.ne; |         ne = first_shard.ne; | ||||||
| @@ -463,8 +463,8 @@ struct llama_file_loader { | |||||||
|                 } |                 } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         throw format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?", |         throw std::runtime_error(format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?", | ||||||
|                      magic, version); |                      magic, version)); | ||||||
|     } |     } | ||||||
|     void read_hparams() { |     void read_hparams() { | ||||||
|         hparams.n_vocab = file.read_u32(); |         hparams.n_vocab = file.read_u32(); | ||||||
| @@ -504,7 +504,7 @@ struct llama_file_loader { | |||||||
|             file.read_raw(shard.ne.data(), sizeof(shard.ne[0]) * n_dims); |             file.read_raw(shard.ne.data(), sizeof(shard.ne[0]) * n_dims); | ||||||
|             std::string name = file.read_string(name_len); |             std::string name = file.read_string(name_len); | ||||||
|             if (n_dims < 1 || n_dims > 2) { |             if (n_dims < 1 || n_dims > 2) { | ||||||
|                 throw format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims); |                 throw std::runtime_error(format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims)); | ||||||
|             } |             } | ||||||
|             switch (shard.type) { |             switch (shard.type) { | ||||||
|                 case GGML_TYPE_F32: |                 case GGML_TYPE_F32: | ||||||
| @@ -521,7 +521,7 @@ struct llama_file_loader { | |||||||
|                 case GGML_TYPE_Q6_K: |                 case GGML_TYPE_Q6_K: | ||||||
|                     break; |                     break; | ||||||
|                 default: { |                 default: { | ||||||
|                     throw format("unrecognized tensor type %u\n", shard.type); |                     throw std::runtime_error(format("unrecognized tensor type %u\n", shard.type)); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |  | ||||||
| @@ -630,7 +630,7 @@ struct llama_model_loader { | |||||||
|             auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map); |             auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map); | ||||||
|             file_loaders.emplace_back(ith_file); |             file_loaders.emplace_back(ith_file); | ||||||
|             if (ith_file->hparams != first_file->hparams) { |             if (ith_file->hparams != first_file->hparams) { | ||||||
|                 throw format("llama.cpp: hparams inconsistent between files"); |                 throw std::runtime_error(format("llama.cpp: hparams inconsistent between files")); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         if (!llama_mmap::SUPPORTED) { |         if (!llama_mmap::SUPPORTED) { | ||||||
| @@ -660,7 +660,7 @@ struct llama_model_loader { | |||||||
|     uint32_t guess_n_parts() const { |     uint32_t guess_n_parts() const { | ||||||
|         auto it = tensors_map.name_to_idx.find("tok_embeddings.weight"); |         auto it = tensors_map.name_to_idx.find("tok_embeddings.weight"); | ||||||
|         if (it == tensors_map.name_to_idx.end()) { |         if (it == tensors_map.name_to_idx.end()) { | ||||||
|             throw std::string("missing tok_embeddings.weight"); |             throw std::runtime_error(std::string("missing tok_embeddings.weight")); | ||||||
|         } |         } | ||||||
|         const llama_load_tensor & lt = tensors_map.tensors.at(it->second); |         const llama_load_tensor & lt = tensors_map.tensors.at(it->second); | ||||||
|         return file_loaders.at(0)->hparams.n_embd / lt.shards.at(0).ne.at(0); |         return file_loaders.at(0)->hparams.n_embd / lt.shards.at(0).ne.at(0); | ||||||
| @@ -677,12 +677,12 @@ struct llama_model_loader { | |||||||
|     struct ggml_tensor * get_tensor(const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) { |     struct ggml_tensor * get_tensor(const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) { | ||||||
|         auto it = tensors_map.name_to_idx.find(name); |         auto it = tensors_map.name_to_idx.find(name); | ||||||
|         if (it == tensors_map.name_to_idx.end()) { |         if (it == tensors_map.name_to_idx.end()) { | ||||||
|             throw format("llama.cpp: tensor '%s' is missing from model", name.c_str()); |             throw std::runtime_error(std::runtime_error(format("llama.cpp: tensor '%s' is missing from model", name.c_str()))); | ||||||
|         } |         } | ||||||
|         llama_load_tensor & lt = tensors_map.tensors.at(it->second); |         llama_load_tensor & lt = tensors_map.tensors.at(it->second); | ||||||
|         if (lt.ne != ne) { |         if (lt.ne != ne) { | ||||||
|             throw format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s", |             throw std::runtime_error(format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s", | ||||||
|                          name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str()); |                          name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str())); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         return get_tensor_for(lt, backend); |         return get_tensor_for(lt, backend); | ||||||
| @@ -706,7 +706,7 @@ struct llama_model_loader { | |||||||
|  |  | ||||||
|     void done_getting_tensors() const { |     void done_getting_tensors() const { | ||||||
|         if (num_ggml_tensors_created != tensors_map.tensors.size()) { |         if (num_ggml_tensors_created != tensors_map.tensors.size()) { | ||||||
|             throw std::string("llama.cpp: file contained more tensors than expected"); |             throw std::runtime_error(std::string("llama.cpp: file contained more tensors than expected")); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -994,7 +994,7 @@ static void llama_model_load_internal( | |||||||
|         if (hparams.ftype != LLAMA_FTYPE_ALL_F32     && |         if (hparams.ftype != LLAMA_FTYPE_ALL_F32     && | ||||||
|             hparams.ftype != LLAMA_FTYPE_MOSTLY_F16  && |             hparams.ftype != LLAMA_FTYPE_MOSTLY_F16  && | ||||||
|             hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) { |             hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) { | ||||||
|             throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)"); |             throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)")); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -1002,7 +1002,7 @@ static void llama_model_load_internal( | |||||||
|         if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 || |         if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 || | ||||||
|             hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 || |             hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 || | ||||||
|             hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) { |             hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) { | ||||||
|             throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)"); |             throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)")); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -1033,7 +1033,7 @@ static void llama_model_load_internal( | |||||||
|  |  | ||||||
|         model.ctx = ggml_init(params); |         model.ctx = ggml_init(params); | ||||||
|         if (!model.ctx) { |         if (!model.ctx) { | ||||||
|             throw format("ggml_init() failed"); |             throw std::runtime_error(format("ggml_init() failed")); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -1214,8 +1214,8 @@ static bool llama_model_load( | |||||||
|         llama_model_load_internal(fname, lctx, n_ctx, n_gpu_layers, memory_type, use_mmap, use_mlock, |         llama_model_load_internal(fname, lctx, n_ctx, n_gpu_layers, memory_type, use_mmap, use_mlock, | ||||||
|                                   vocab_only, progress_callback, progress_callback_user_data); |                                   vocab_only, progress_callback, progress_callback_user_data); | ||||||
|         return true; |         return true; | ||||||
|     } catch (const std::string & err) { |     } catch (const std::exception & err) { | ||||||
|         fprintf(stderr, "error loading model: %s\n", err.c_str()); |         fprintf(stderr, "error loading model: %s\n", err.what()); | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -2120,8 +2120,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s | |||||||
|         case LLAMA_FTYPE_MOSTLY_Q5_0: quantized_type = GGML_TYPE_Q5_0; break; |         case LLAMA_FTYPE_MOSTLY_Q5_0: quantized_type = GGML_TYPE_Q5_0; break; | ||||||
|         case LLAMA_FTYPE_MOSTLY_Q5_1: quantized_type = GGML_TYPE_Q5_1; break; |         case LLAMA_FTYPE_MOSTLY_Q5_1: quantized_type = GGML_TYPE_Q5_1; break; | ||||||
|         case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break; |         case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break; | ||||||
|  |  | ||||||
|         // K-quants |         // K-quants | ||||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break; |         case LLAMA_FTYPE_MOSTLY_Q2_K:   quantized_type = GGML_TYPE_Q2_K; break; | ||||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_S: |         case LLAMA_FTYPE_MOSTLY_Q3_K_S: | ||||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_M: |         case LLAMA_FTYPE_MOSTLY_Q3_K_M: | ||||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break; |         case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break; | ||||||
| @@ -2129,8 +2130,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s | |||||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break; |         case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break; | ||||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_S: |         case LLAMA_FTYPE_MOSTLY_Q5_K_S: | ||||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break; |         case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break; | ||||||
|         case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break; |         case LLAMA_FTYPE_MOSTLY_Q6_K:   quantized_type = GGML_TYPE_Q6_K; break; | ||||||
|         default: throw format("invalid output file type %d\n", ftype); |         default: throw std::runtime_error(format("invalid output file type %d\n", ftype)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (nthread <= 0) { |     if (nthread <= 0) { | ||||||
| @@ -2231,7 +2232,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s | |||||||
|                     f32_data[i] = ggml_fp16_to_fp32(f16_data[i]); |                     f32_data[i] = ggml_fp16_to_fp32(f16_data[i]); | ||||||
|                 } |                 } | ||||||
|             } else { |             } else { | ||||||
|                 throw format("type %s unsupported for integer quantization", ggml_type_name(tensor.type)); |                 throw std::runtime_error(format("type %s unsupported for integer quantization", ggml_type_name(tensor.type))); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             printf("quantizing .. "); |             printf("quantizing .. "); | ||||||
| @@ -2433,8 +2434,8 @@ int llama_model_quantize( | |||||||
|     try { |     try { | ||||||
|         llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread); |         llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread); | ||||||
|         return 0; |         return 0; | ||||||
|     } catch (const std::string & err) { |     } catch (const std::exception & err) { | ||||||
|         fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.c_str()); |         fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.what()); | ||||||
|         return 1; |         return 1; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -2687,8 +2688,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char * | |||||||
| int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, const char * path_base_model, int n_threads) { | int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, const char * path_base_model, int n_threads) { | ||||||
|     try { |     try { | ||||||
|         return llama_apply_lora_from_file_internal(ctx, path_lora, path_base_model, n_threads); |         return llama_apply_lora_from_file_internal(ctx, path_lora, path_base_model, n_threads); | ||||||
|     } catch (const std::string & err) { |     } catch (const std::exception & err) { | ||||||
|         fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, err.c_str()); |         fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, err.what()); | ||||||
|         return 1; |         return 1; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 mgroeber9110
					mgroeber9110