mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	Fixed mmap prefetch for GPU offloading (#2529)
This commit is contained in:
		@@ -219,7 +219,7 @@ struct llama_mmap {
 | 
				
			|||||||
        // prefetch/readahead impairs performance on NUMA systems
 | 
					        // prefetch/readahead impairs performance on NUMA systems
 | 
				
			||||||
        if (numa) { prefetch = 0; }
 | 
					        if (numa) { prefetch = 0; }
 | 
				
			||||||
#ifdef __linux__
 | 
					#ifdef __linux__
 | 
				
			||||||
        if (prefetch) { flags |= MAP_POPULATE; }
 | 
					        if (prefetch >= file->size) { flags |= MAP_POPULATE; }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
        addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
 | 
					        addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
 | 
				
			||||||
        if (addr == MAP_FAILED) {
 | 
					        if (addr == MAP_FAILED) {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -747,12 +747,12 @@ struct llama_model_loader {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    void load_all_data(llama_progress_callback progress_callback, void *  progress_callback_user_data, llama_mlock * lmlock) {
 | 
					    void load_all_data(llama_progress_callback progress_callback, void *  progress_callback_user_data, llama_mlock * lmlock) {
 | 
				
			||||||
        size_t data_size = 0;
 | 
					        size_t data_size = 0;
 | 
				
			||||||
        size_t prefetch_size = 0;
 | 
					        size_t prefetch_size = file_loader->file.size;
 | 
				
			||||||
        size_t lock_size = 0;
 | 
					        size_t lock_size = 0;
 | 
				
			||||||
        for (const llama_load_tensor & lt : tensors_map.tensors) {
 | 
					        for (const llama_load_tensor & lt : tensors_map.tensors) {
 | 
				
			||||||
            data_size += lt.size;
 | 
					            data_size += lt.size;
 | 
				
			||||||
            if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) {
 | 
					            if (lt.ggml_tensor->backend != GGML_BACKEND_CPU) {
 | 
				
			||||||
                prefetch_size += lt.size;
 | 
					                prefetch_size -= lt.size;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user