mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	metal : free metal objects (#5161)
* Releasing MTLFunction references after Metal pipeline construction * Keeping the `ggml_metal_kernel` structure * Spacing fix * Whitespace fix
This commit is contained in:
		 Paul Tsochantaris
					Paul Tsochantaris
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							35dec26cc2
						
					
				
				
					commit
					d2f650cb5b
				
			
							
								
								
									
										31
									
								
								ggml-metal.m
									
									
									
									
									
								
							
							
						
						
									
										31
									
								
								ggml-metal.m
									
									
									
									
									
								
							| @@ -24,10 +24,7 @@ | |||||||
|  |  | ||||||
| #define UNUSED(x) (void)(x) | #define UNUSED(x) (void)(x) | ||||||
|  |  | ||||||
| #define GGML_METAL_MAX_KERNELS 256 |  | ||||||
|  |  | ||||||
| struct ggml_metal_kernel { | struct ggml_metal_kernel { | ||||||
|     id<MTLFunction>             function; |  | ||||||
|     id<MTLComputePipelineState> pipeline; |     id<MTLComputePipelineState> pipeline; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -159,11 +156,10 @@ struct ggml_metal_context { | |||||||
|  |  | ||||||
|     id<MTLDevice>       device; |     id<MTLDevice>       device; | ||||||
|     id<MTLCommandQueue> queue; |     id<MTLCommandQueue> queue; | ||||||
|     id<MTLLibrary>      library; |  | ||||||
|  |  | ||||||
|     dispatch_queue_t d_queue; |     dispatch_queue_t d_queue; | ||||||
|  |  | ||||||
|     struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS]; |     struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT]; | ||||||
|  |  | ||||||
|     bool support_simdgroup_reduction; |     bool support_simdgroup_reduction; | ||||||
|     bool support_simdgroup_mm; |     bool support_simdgroup_mm; | ||||||
| @@ -246,6 +242,8 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { | |||||||
|     ctx->queue  = [ctx->device newCommandQueue]; |     ctx->queue  = [ctx->device newCommandQueue]; | ||||||
|     ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); |     ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); | ||||||
|  |  | ||||||
|  |     id<MTLLibrary> metal_library; | ||||||
|  |  | ||||||
|     // load library |     // load library | ||||||
|     { |     { | ||||||
|         NSBundle * bundle = nil; |         NSBundle * bundle = nil; | ||||||
| @@ -260,7 +258,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { | |||||||
|             // pre-compiled library found |             // pre-compiled library found | ||||||
|             NSURL * libURL = [NSURL fileURLWithPath:libPath]; |             NSURL * libURL = [NSURL fileURLWithPath:libPath]; | ||||||
|             GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]); |             GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]); | ||||||
|             ctx->library = [ctx->device newLibraryWithURL:libURL error:&error]; |             metal_library = [ctx->device newLibraryWithURL:libURL error:&error]; | ||||||
|             if (error) { |             if (error) { | ||||||
|                 GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); |                 GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); | ||||||
|                 return NULL; |                 return NULL; | ||||||
| @@ -302,7 +300,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { | |||||||
|  |  | ||||||
|                 //[options setFastMathEnabled:false]; |                 //[options setFastMathEnabled:false]; | ||||||
|  |  | ||||||
|                 ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error]; |                 metal_library = [ctx->device newLibraryWithSource:src options:options error:&error]; | ||||||
|                 if (error) { |                 if (error) { | ||||||
|                     GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); |                     GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); | ||||||
|                     return NULL; |                     return NULL; | ||||||
| @@ -367,8 +365,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { | |||||||
|     { |     { | ||||||
|         NSError * error = nil; |         NSError * error = nil; | ||||||
|  |  | ||||||
|         for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) { |         for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) { | ||||||
|             ctx->kernels[i].function = nil; |  | ||||||
|             ctx->kernels[i].pipeline = nil; |             ctx->kernels[i].pipeline = nil; | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -380,10 +377,12 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { | |||||||
| #define GGML_METAL_ADD_KERNEL(e, name, supported) \ | #define GGML_METAL_ADD_KERNEL(e, name, supported) \ | ||||||
|         if (supported) { \ |         if (supported) { \ | ||||||
|             struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \ |             struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \ | ||||||
|             kernel->function = [ctx->library newFunctionWithName:@"kernel_"#name]; \ |             id<MTLFunction> metal_function = [metal_library newFunctionWithName:@"kernel_"#name]; \ | ||||||
|             kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:kernel->function error:&error]; \ |             kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:metal_function error:&error]; \ | ||||||
|  |             [metal_function release]; \ | ||||||
|             if (error) { \ |             if (error) { \ | ||||||
|                 GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \ |                 GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \ | ||||||
|  |                 [metal_library release]; \ | ||||||
|                 return NULL; \ |                 return NULL; \ | ||||||
|             } \ |             } \ | ||||||
|         } else { \ |         } else { \ | ||||||
| @@ -512,23 +511,17 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { | |||||||
|         GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS,                  sum_rows,               true); |         GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS,                  sum_rows,               true); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     [metal_library release]; | ||||||
|     return ctx; |     return ctx; | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_metal_free(struct ggml_metal_context * ctx) { | static void ggml_metal_free(struct ggml_metal_context * ctx) { | ||||||
|     GGML_METAL_LOG_INFO("%s: deallocating\n", __func__); |     GGML_METAL_LOG_INFO("%s: deallocating\n", __func__); | ||||||
|  |  | ||||||
|     for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) { |     for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) { | ||||||
|         if (ctx->kernels[i].pipeline) { |  | ||||||
|         [ctx->kernels[i].pipeline release]; |         [ctx->kernels[i].pipeline release]; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|         if (ctx->kernels[i].function) { |  | ||||||
|             [ctx->kernels[i].function release]; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     [ctx->library release]; |  | ||||||
|     [ctx->queue release]; |     [ctx->queue release]; | ||||||
|     [ctx->device release]; |     [ctx->device release]; | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user