mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	Unmap the file in llama_free
This commit is contained in:
		
							
								
								
									
										41
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										41
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -149,6 +149,10 @@ struct llama_model { | ||||
|     // the model memory buffer | ||||
|     std::vector<uint8_t> buf; | ||||
|  | ||||
|     // model memory mapped file | ||||
|     void * mm_addr; | ||||
|     size_t mm_length; | ||||
|  | ||||
|     // tensors | ||||
|     int n_loaded; | ||||
|     std::unordered_map<std::string, struct ggml_tensor *> tensors; | ||||
| @@ -296,22 +300,32 @@ struct llama_context_params llama_context_default_params() { | ||||
| // model loading | ||||
| // | ||||
|  | ||||
| static void * mmap_file(const char* fname) { | ||||
| static void mmap_file(const char* fname, void * &mm_addr, size_t &mm_length) { | ||||
| #if defined(MAP_FAILED) | ||||
|     // POSIX mmap | ||||
|     // POSIX | ||||
|     int fd = open(fname, O_RDONLY); | ||||
|     size_t len = lseek(fd, 0, SEEK_END); | ||||
|     void * mm_addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); | ||||
|     mm_length = lseek(fd, 0, SEEK_END); | ||||
|     mm_addr = mmap(NULL, mm_length, PROT_READ, MAP_SHARED, fd, 0); | ||||
|     close(fd); | ||||
|     if (mm_addr == MAP_FAILED) { | ||||
|         perror("mmap failed"); | ||||
|         mm_addr = NULL; | ||||
|         mm_length = 0; | ||||
|     } | ||||
|     close(fd); | ||||
|     return mm_addr; | ||||
| #else | ||||
|     // TODO: windows support | ||||
|     (void)(fname); // suppress warnings | ||||
|     return NULL; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| static void munmap_file(void * addr, size_t length) { | ||||
| #if defined(MAP_FAILED) | ||||
|     // POSIX | ||||
|     munmap(addr, length); | ||||
| #else | ||||
|     // TODO: windows support | ||||
|     (void)(addr); // suppress warnings | ||||
|     (void)(length); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| @@ -480,12 +494,15 @@ static bool llama_model_load( | ||||
|     bool use_mmap = (n_parts == 1); | ||||
|  | ||||
|     // try to memory map the model file | ||||
|     void* mm_addr = NULL; | ||||
|     void * mm_addr = NULL; | ||||
|     if (use_mmap) { | ||||
|         mm_addr = mmap_file(fname.c_str()); | ||||
|         if (mm_addr == NULL) { | ||||
|         mmap_file(fname.c_str(), model.mm_addr, model.mm_length); | ||||
|         if (model.mm_addr == NULL) { | ||||
|             use_mmap = false; | ||||
|         } | ||||
|         else { | ||||
|             mm_addr = model.mm_addr; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     auto & ctx = model.ctx; | ||||
| @@ -1750,6 +1767,10 @@ void llama_free(struct llama_context * ctx) { | ||||
|         ggml_free(ctx->model.ctx); | ||||
|     } | ||||
|  | ||||
|     if (ctx->model.mm_addr) { | ||||
|         munmap_file(ctx->model.mm_addr, ctx->model.mm_length); | ||||
|     } | ||||
|  | ||||
|     delete ctx; | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Slaren
					Slaren