mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : check that all the tensor data is in the model file (#6885)
* llama : check that all the tensor data is in the model file * also check for unsigned overflow
This commit is contained in:
		
							
								
								
									
										23
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										23
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -2999,9 +2999,13 @@ struct llama_model_loader { | ||||
|  | ||||
|         ggml_tensor * tensor; | ||||
|  | ||||
|         llama_tensor_weight(uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) { | ||||
|         llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) { | ||||
|             const int tensor_idx = gguf_find_tensor(gguf_ctx, name); | ||||
|             offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx); | ||||
|  | ||||
|             if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) { | ||||
|                 throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name)); | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
|     std::vector<llama_tensor_weight> weights; | ||||
| @@ -3040,15 +3044,15 @@ struct llama_model_loader { | ||||
|         get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false); | ||||
|         llm_kv = LLM_KV(llm_arch_from_string(arch_name)); | ||||
|  | ||||
|         files.emplace_back(new llama_file(fname.c_str(), "rb")); | ||||
|         contexts.emplace_back(ctx); | ||||
|  | ||||
|         // Save tensors data offset of the main file. | ||||
|         // For subsidiary files, `meta` tensor data offset must not be used, | ||||
|         // so we build a unified tensors index for weights. | ||||
|         for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { | ||||
|             weights.emplace_back(0, cur->name, meta, cur); | ||||
|             weights.emplace_back(files.back().get(), 0, cur->name, meta, cur); | ||||
|         } | ||||
|         files.emplace_back(new llama_file(fname.c_str(), "rb")); | ||||
|         contexts.emplace_back(ctx); | ||||
|  | ||||
|         uint16_t n_split = 0; | ||||
|         get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false); | ||||
|  | ||||
| @@ -3082,13 +3086,14 @@ struct llama_model_loader { | ||||
|                     throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path)); | ||||
|                 } | ||||
|  | ||||
|                 // Save tensors data offset info of the shard. | ||||
|                 for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { | ||||
|                     weights.emplace_back(idx, cur->name, ctx_gguf, cur); | ||||
|                 } | ||||
|                 files.emplace_back(new llama_file(split_path, "rb")); | ||||
|                 contexts.emplace_back(ctx); | ||||
|  | ||||
|                 // Save tensors data offset info of the shard. | ||||
|                 for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { | ||||
|                     weights.emplace_back(files.back().get(), idx, cur->name, ctx_gguf, cur); | ||||
|                 } | ||||
|  | ||||
|                 gguf_free(ctx_gguf); | ||||
|             } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren