mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-28 08:31:25 +00:00 
			
		
		
		
	Unmap the file in llama_free
This commit is contained in:
		
							
								
								
									
										41
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										41
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -149,6 +149,10 @@ struct llama_model { | |||||||
|     // the model memory buffer |     // the model memory buffer | ||||||
|     std::vector<uint8_t> buf; |     std::vector<uint8_t> buf; | ||||||
|  |  | ||||||
|  |     // model memory mapped file | ||||||
|  |     void * mm_addr; | ||||||
|  |     size_t mm_length; | ||||||
|  |  | ||||||
|     // tensors |     // tensors | ||||||
|     int n_loaded; |     int n_loaded; | ||||||
|     std::unordered_map<std::string, struct ggml_tensor *> tensors; |     std::unordered_map<std::string, struct ggml_tensor *> tensors; | ||||||
| @@ -296,22 +300,32 @@ struct llama_context_params llama_context_default_params() { | |||||||
| // model loading | // model loading | ||||||
| // | // | ||||||
|  |  | ||||||
| static void * mmap_file(const char* fname) { | static void mmap_file(const char* fname, void * &mm_addr, size_t &mm_length) { | ||||||
| #if defined(MAP_FAILED) | #if defined(MAP_FAILED) | ||||||
|     // POSIX mmap |     // POSIX | ||||||
|     int fd = open(fname, O_RDONLY); |     int fd = open(fname, O_RDONLY); | ||||||
|     size_t len = lseek(fd, 0, SEEK_END); |     mm_length = lseek(fd, 0, SEEK_END); | ||||||
|     void * mm_addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); |     mm_addr = mmap(NULL, mm_length, PROT_READ, MAP_SHARED, fd, 0); | ||||||
|  |     close(fd); | ||||||
|     if (mm_addr == MAP_FAILED) { |     if (mm_addr == MAP_FAILED) { | ||||||
|         perror("mmap failed"); |         perror("mmap failed"); | ||||||
|         mm_addr = NULL; |         mm_addr = NULL; | ||||||
|  |         mm_length = 0; | ||||||
|     } |     } | ||||||
|     close(fd); |  | ||||||
|     return mm_addr; |  | ||||||
| #else | #else | ||||||
|     // TODO: windows support |     // TODO: windows support | ||||||
|     (void)(fname); // suppress warnings |     (void)(fname); // suppress warnings | ||||||
|     return NULL; | #endif | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static void munmap_file(void * addr, size_t length) { | ||||||
|  | #if defined(MAP_FAILED) | ||||||
|  |     // POSIX | ||||||
|  |     munmap(addr, length); | ||||||
|  | #else | ||||||
|  |     // TODO: windows support | ||||||
|  |     (void)(addr); // suppress warnings | ||||||
|  |     (void)(length); | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -480,12 +494,15 @@ static bool llama_model_load( | |||||||
|     bool use_mmap = (n_parts == 1); |     bool use_mmap = (n_parts == 1); | ||||||
|  |  | ||||||
|     // try to memory map the model file |     // try to memory map the model file | ||||||
|     void* mm_addr = NULL; |     void * mm_addr = NULL; | ||||||
|     if (use_mmap) { |     if (use_mmap) { | ||||||
|         mm_addr = mmap_file(fname.c_str()); |         mmap_file(fname.c_str(), model.mm_addr, model.mm_length); | ||||||
|         if (mm_addr == NULL) { |         if (model.mm_addr == NULL) { | ||||||
|             use_mmap = false; |             use_mmap = false; | ||||||
|         } |         } | ||||||
|  |         else { | ||||||
|  |             mm_addr = model.mm_addr; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     auto & ctx = model.ctx; |     auto & ctx = model.ctx; | ||||||
| @@ -1750,6 +1767,10 @@ void llama_free(struct llama_context * ctx) { | |||||||
|         ggml_free(ctx->model.ctx); |         ggml_free(ctx->model.ctx); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     if (ctx->model.mm_addr) { | ||||||
|  |         munmap_file(ctx->model.mm_addr, ctx->model.mm_length); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     delete ctx; |     delete ctx; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Slaren
					Slaren