mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : fix quantize with dl backends (#13539)
This commit is contained in:
		| @@ -822,13 +822,18 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps | ||||
|         mappings.reserve(files.size()); | ||||
|         mmaps_used.reserve(files.size()); | ||||
|         for (const auto & file : files) { | ||||
|             auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU)); | ||||
|             if (!reg) { | ||||
|                 throw std::runtime_error(format("%s: no CPU backend found", __func__)); | ||||
|             bool is_numa = false; | ||||
|  | ||||
|             auto * dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); | ||||
|             if (dev) { | ||||
|                 auto * reg = ggml_backend_dev_backend_reg(dev); | ||||
|                 auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa"); | ||||
|                 if (is_numa_fn) { | ||||
|                     is_numa = is_numa_fn(); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa"); | ||||
|             std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn()); | ||||
|             std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa); | ||||
|             mmaps_used.emplace_back(mapping->size(), 0); | ||||
|             if (mlock_mmaps) { | ||||
|                 std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock()); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Diego Devesa
					Diego Devesa