mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	llama : fix quantize with dl backends (#13539)
This commit is contained in:
		@@ -822,13 +822,18 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
 | 
			
		||||
        mappings.reserve(files.size());
 | 
			
		||||
        mmaps_used.reserve(files.size());
 | 
			
		||||
        for (const auto & file : files) {
 | 
			
		||||
            auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
 | 
			
		||||
            if (!reg) {
 | 
			
		||||
                throw std::runtime_error(format("%s: no CPU backend found", __func__));
 | 
			
		||||
            bool is_numa = false;
 | 
			
		||||
 | 
			
		||||
            auto * dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
 | 
			
		||||
            if (dev) {
 | 
			
		||||
                auto * reg = ggml_backend_dev_backend_reg(dev);
 | 
			
		||||
                auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
 | 
			
		||||
                if (is_numa_fn) {
 | 
			
		||||
                    is_numa = is_numa_fn();
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
 | 
			
		||||
            std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn());
 | 
			
		||||
            std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa);
 | 
			
		||||
            mmaps_used.emplace_back(mapping->size(), 0);
 | 
			
		||||
            if (mlock_mmaps) {
 | 
			
		||||
                std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock());
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user