mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : fix platforms without mmap (#4578)
* llama : fix platforms without mmap * win32 : limit prefetch size to the file size * fix win32 error clobber, unnecessary std::string in std::runtime_error
This commit is contained in:
		
							
								
								
									
										36
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										36
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -778,7 +778,7 @@ struct llama_file { | ||||
|             throw std::runtime_error(format("read error: %s", strerror(errno))); | ||||
|         } | ||||
|         if (ret != 1) { | ||||
|             throw std::runtime_error(std::string("unexpectedly reached end of file")); | ||||
|             throw std::runtime_error("unexpectedly reached end of file"); | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -931,29 +931,29 @@ struct llama_mmap { | ||||
| #elif defined(_WIN32) | ||||
|     static constexpr bool SUPPORTED = true; | ||||
|  | ||||
|     llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) { | ||||
|         (void) numa; | ||||
|     llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1, bool numa = false) { | ||||
|         GGML_UNUSED(numa); | ||||
|  | ||||
|         size = file->size; | ||||
|  | ||||
|         HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp)); | ||||
|  | ||||
|         HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL); | ||||
|         DWORD error = GetLastError(); | ||||
|  | ||||
|         if (hMapping == NULL) { | ||||
|             DWORD error = GetLastError(); | ||||
|             throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str())); | ||||
|         } | ||||
|  | ||||
|         addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0); | ||||
|         error = GetLastError(); | ||||
|         DWORD error = GetLastError(); | ||||
|         CloseHandle(hMapping); | ||||
|  | ||||
|         if (addr == NULL) { | ||||
|             throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str())); | ||||
|         } | ||||
|  | ||||
|         if (prefetch) { | ||||
|         if (prefetch > 0) { | ||||
|             // PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it | ||||
|             BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG); | ||||
|             HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll"); | ||||
| @@ -965,9 +965,9 @@ struct llama_mmap { | ||||
|                 // advise the kernel to preload the mapped memory | ||||
|                 WIN32_MEMORY_RANGE_ENTRY range; | ||||
|                 range.VirtualAddress = addr; | ||||
|                 range.NumberOfBytes = (SIZE_T)size; | ||||
|                 range.NumberOfBytes = (SIZE_T) std::min(size, prefetch); | ||||
|                 if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) { | ||||
|                     fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n", | ||||
|                     LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n", | ||||
|                             llama_format_win_err(GetLastError()).c_str()); | ||||
|                 } | ||||
|             } | ||||
| @@ -982,26 +982,26 @@ struct llama_mmap { | ||||
|  | ||||
|     ~llama_mmap() { | ||||
|         if (!UnmapViewOfFile(addr)) { | ||||
|             fprintf(stderr, "warning: UnmapViewOfFile failed: %s\n", | ||||
|             LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n", | ||||
|                     llama_format_win_err(GetLastError()).c_str()); | ||||
|         } | ||||
|     } | ||||
| #else | ||||
|     static constexpr bool SUPPORTED = false; | ||||
|  | ||||
|     llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) { | ||||
|         (void) file; | ||||
|         (void) prefetch; | ||||
|         (void) numa; | ||||
|     llama_mmap(struct llama_file * file, size_t prefetch = -1, bool numa = false) { | ||||
|         GGML_UNUSED(file); | ||||
|         GGML_UNUSED(prefetch); | ||||
|         GGML_UNUSED(numa); | ||||
|  | ||||
|         throw std::runtime_error(std::string("mmap not supported")); | ||||
|         throw std::runtime_error("mmap not supported"); | ||||
|     } | ||||
|  | ||||
|     void unmap(size_t offset, size_t len) { | ||||
|         (void) offset; | ||||
|         (void) len; | ||||
|     void unmap_fragment(size_t first, size_t last) { | ||||
|         GGML_UNUSED(first); | ||||
|         GGML_UNUSED(last); | ||||
|  | ||||
|         throw std::runtime_error(std::string("mmap not supported")); | ||||
|         throw std::runtime_error("mmap not supported"); | ||||
|     } | ||||
| #endif | ||||
| }; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren