mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	ggml : add max buffer sizes to opencl and metal backends (#5181)
This commit is contained in:
		
							
								
								
									
										12
									
								
								ggml-metal.m
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								ggml-metal.m
									
									
									
									
									
								
							| @@ -2375,6 +2375,16 @@ GGML_CALL static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backen | |||||||
|     UNUSED(buft); |     UNUSED(buft); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | GGML_CALL static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { | ||||||
|  |     id<MTLDevice> device = ggml_backend_metal_get_device(); | ||||||
|  |     size_t max_size = device.maxBufferLength; | ||||||
|  |     ggml_backend_metal_free_device(); | ||||||
|  |  | ||||||
|  |     return max_size; | ||||||
|  |  | ||||||
|  |     UNUSED(buft); | ||||||
|  | } | ||||||
|  |  | ||||||
| GGML_CALL static bool ggml_backend_metal_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) { | GGML_CALL static bool ggml_backend_metal_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) { | ||||||
|     return ggml_backend_is_metal(backend) || ggml_backend_is_cpu(backend); |     return ggml_backend_is_metal(backend) || ggml_backend_is_cpu(backend); | ||||||
|  |  | ||||||
| @@ -2393,7 +2403,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) { | |||||||
|             /* .get_name         = */ ggml_backend_metal_buffer_type_get_name, |             /* .get_name         = */ ggml_backend_metal_buffer_type_get_name, | ||||||
|             /* .alloc_buffer     = */ ggml_backend_metal_buffer_type_alloc_buffer, |             /* .alloc_buffer     = */ ggml_backend_metal_buffer_type_alloc_buffer, | ||||||
|             /* .get_alignment    = */ ggml_backend_metal_buffer_type_get_alignment, |             /* .get_alignment    = */ ggml_backend_metal_buffer_type_get_alignment, | ||||||
|             /* .get_max_size     = */ NULL, // TODO: return device.maxBufferLength |             /* .get_max_size     = */ ggml_backend_metal_buffer_type_get_max_size, | ||||||
|             /* .get_alloc_size   = */ NULL, // defaults to ggml_nbytes |             /* .get_alloc_size   = */ NULL, // defaults to ggml_nbytes | ||||||
|             /* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend, |             /* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend, | ||||||
|             /* .is_host          = */ ggml_backend_metal_buffer_type_is_host, |             /* .is_host          = */ ggml_backend_metal_buffer_type_is_host, | ||||||
|   | |||||||
| @@ -2125,6 +2125,15 @@ static size_t ggml_backend_opencl_buffer_type_get_alignment(ggml_backend_buffer_ | |||||||
|     GGML_UNUSED(buffer_type); |     GGML_UNUSED(buffer_type); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static size_t ggml_backend_opencl_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) { | ||||||
|  |     static size_t max_size = -1; | ||||||
|  |     if (max_size == (size_t)-1) { | ||||||
|  |         ggml_cl_init(); | ||||||
|  |         clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_size, NULL); | ||||||
|  |     } | ||||||
|  |     return max_size; | ||||||
|  | } | ||||||
|  |  | ||||||
| static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) { | static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) { | ||||||
|     //return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend |     //return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend | ||||||
|     return ggml_backend_is_cpu(backend); |     return ggml_backend_is_cpu(backend); | ||||||
| @@ -2136,7 +2145,7 @@ static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = { | |||||||
|     /* .get_name         = */ ggml_backend_opencl_buffer_type_name, |     /* .get_name         = */ ggml_backend_opencl_buffer_type_name, | ||||||
|     /* .alloc_buffer     = */ ggml_backend_opencl_buffer_type_alloc_buffer, |     /* .alloc_buffer     = */ ggml_backend_opencl_buffer_type_alloc_buffer, | ||||||
|     /* .get_alignment    = */ ggml_backend_opencl_buffer_type_get_alignment, |     /* .get_alignment    = */ ggml_backend_opencl_buffer_type_get_alignment, | ||||||
|     /* .get_max_size     = */ NULL, // TODO: return from device info |     /* .get_max_size     = */ ggml_backend_opencl_buffer_type_get_max_size, | ||||||
|     /* .get_alloc_size   = */ NULL, |     /* .get_alloc_size   = */ NULL, | ||||||
|     /* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend, |     /* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend, | ||||||
|     /* .is_host          = */ NULL, |     /* .is_host          = */ NULL, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren