mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	metal : use residency sets (#11427)
* metal : use residency sets ggml-ci * metal : restore commandBufferWithUnretainedReferences calls [no ci] * metal : release descriptors ggml-ci * metal : check env GGML_METAL_NO_RESIDENCY ggml-ci * metal : fix build + clean-up ggml-ci
This commit is contained in:
		| @@ -19,7 +19,10 @@ | |||||||
| // max number of MTLCommandBuffer used to submit a graph for processing | // max number of MTLCommandBuffer used to submit a graph for processing | ||||||
| #define GGML_METAL_MAX_COMMAND_BUFFERS 8 | #define GGML_METAL_MAX_COMMAND_BUFFERS 8 | ||||||
|  |  | ||||||
| #define UNUSED(x) (void)(x) | // create residency sets only on macOS >= 15.0 | ||||||
|  | #if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000 | ||||||
|  | #define GGML_METAL_HAS_RESIDENCY_SETS 1 | ||||||
|  | #endif | ||||||
|  |  | ||||||
| // globals | // globals | ||||||
|  |  | ||||||
| @@ -39,6 +42,7 @@ static struct ggml_backend_metal_device_context { | |||||||
|  |  | ||||||
|     bool has_simdgroup_reduction; |     bool has_simdgroup_reduction; | ||||||
|     bool has_simdgroup_mm; |     bool has_simdgroup_mm; | ||||||
|  |     bool has_residency_sets; | ||||||
|     bool has_bfloat; |     bool has_bfloat; | ||||||
|     bool use_bfloat; |     bool use_bfloat; | ||||||
|  |  | ||||||
| @@ -48,6 +52,7 @@ static struct ggml_backend_metal_device_context { | |||||||
|     /*.mtl_device_ref_count    =*/ 0, |     /*.mtl_device_ref_count    =*/ 0, | ||||||
|     /*.has_simdgroup_reduction =*/ false, |     /*.has_simdgroup_reduction =*/ false, | ||||||
|     /*.has_simdgroup_mm        =*/ false, |     /*.has_simdgroup_mm        =*/ false, | ||||||
|  |     /*.has_residency_sets      =*/ false, | ||||||
|     /*.has_bfloat              =*/ false, |     /*.has_bfloat              =*/ false, | ||||||
|     /*.use_bfloat              =*/ false, |     /*.use_bfloat              =*/ false, | ||||||
|     /*.name                    =*/ "", |     /*.name                    =*/ "", | ||||||
| @@ -65,6 +70,10 @@ static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_dev | |||||||
|  |  | ||||||
|         ctx->has_simdgroup_mm = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7]; |         ctx->has_simdgroup_mm = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7]; | ||||||
|  |  | ||||||
|  | #if defined(GGML_METAL_HAS_RESIDENCY_SETS) | ||||||
|  |         ctx->has_residency_sets = getenv("GGML_METAL_NO_RESIDENCY") == NULL; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|         ctx->has_bfloat  = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML]; |         ctx->has_bfloat  = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML]; | ||||||
|         ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6]; |         ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6]; | ||||||
|  |  | ||||||
| @@ -483,6 +492,11 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de | |||||||
|     GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]); |     GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]); | ||||||
|  |  | ||||||
|     ctx->queue  = [device newCommandQueue]; |     ctx->queue  = [device newCommandQueue]; | ||||||
|  |     if (ctx->queue == nil) { | ||||||
|  |         GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); |     ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); | ||||||
|  |  | ||||||
|     id<MTLLibrary> metal_library; |     id<MTLLibrary> metal_library; | ||||||
| @@ -649,6 +663,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de | |||||||
|  |  | ||||||
|     GGML_LOG_INFO("%s: simdgroup reduction   = %s\n", __func__, ctx_dev->has_simdgroup_reduction     ? "true" : "false"); |     GGML_LOG_INFO("%s: simdgroup reduction   = %s\n", __func__, ctx_dev->has_simdgroup_reduction     ? "true" : "false"); | ||||||
|     GGML_LOG_INFO("%s: simdgroup matrix mul. = %s\n", __func__, ctx_dev->has_simdgroup_mm            ? "true" : "false"); |     GGML_LOG_INFO("%s: simdgroup matrix mul. = %s\n", __func__, ctx_dev->has_simdgroup_mm            ? "true" : "false"); | ||||||
|  |     GGML_LOG_INFO("%s: has residency sets    = %s\n", __func__, ctx_dev->has_residency_sets          ? "true" : "false"); | ||||||
|     GGML_LOG_INFO("%s: has bfloat            = %s\n", __func__, ctx_dev->has_bfloat                  ? "true" : "false"); |     GGML_LOG_INFO("%s: has bfloat            = %s\n", __func__, ctx_dev->has_bfloat                  ? "true" : "false"); | ||||||
|     GGML_LOG_INFO("%s: use bfloat            = %s\n", __func__, ctx_dev->use_bfloat                  ? "true" : "false"); |     GGML_LOG_INFO("%s: use bfloat            = %s\n", __func__, ctx_dev->use_bfloat                  ? "true" : "false"); | ||||||
|     GGML_LOG_INFO("%s: hasUnifiedMemory      = %s\n", __func__, ctx_dev->mtl_device.hasUnifiedMemory ? "true" : "false"); |     GGML_LOG_INFO("%s: hasUnifiedMemory      = %s\n", __func__, ctx_dev->mtl_device.hasUnifiedMemory ? "true" : "false"); | ||||||
| @@ -1035,8 +1050,70 @@ struct ggml_backend_metal_buffer_context { | |||||||
|     // multiple buffers are used only to avoid the maximum buffer size limitation when using mmap |     // multiple buffers are used only to avoid the maximum buffer size limitation when using mmap | ||||||
|     int n_buffers; |     int n_buffers; | ||||||
|     struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS]; |     struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS]; | ||||||
|  |  | ||||||
|  |     // optional MTLResidencySet | ||||||
|  |     id rset; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | // rset init | ||||||
|  | static bool ggml_backend_metal_buffer_rset_init( | ||||||
|  |         struct ggml_backend_metal_buffer_context * ctx, | ||||||
|  |         struct ggml_backend_metal_device_context * ctx_dev, | ||||||
|  |         id<MTLDevice> device) { | ||||||
|  |     ctx->rset = nil; | ||||||
|  |  | ||||||
|  |     if (!ctx_dev->has_residency_sets) { | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | #if defined(GGML_METAL_HAS_RESIDENCY_SETS) | ||||||
|  |     if (@available(macOS 15.0, *)) { | ||||||
|  |         MTLResidencySetDescriptor * desc = [[MTLResidencySetDescriptor alloc] init]; | ||||||
|  |         desc.label = @"ggml_backend_metal"; | ||||||
|  |         desc.initialCapacity = ctx->n_buffers; | ||||||
|  |  | ||||||
|  |         NSError * error; | ||||||
|  |         ctx->rset = [device newResidencySetWithDescriptor:desc error:&error]; | ||||||
|  |         if (error) { | ||||||
|  |             GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); | ||||||
|  |             [desc release]; | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         [desc release]; | ||||||
|  |  | ||||||
|  |         for (int i = 0; i < ctx->n_buffers; i++) { | ||||||
|  |             [ctx->rset addAllocation:ctx->buffers[i].metal]; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         [ctx->rset commit]; | ||||||
|  |         [ctx->rset requestResidency]; | ||||||
|  |  | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
|  | #else | ||||||
|  |     GGML_UNUSED(ctx_dev); | ||||||
|  |     GGML_UNUSED(device); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // rset free | ||||||
|  | static void ggml_backend_metal_buffer_rset_free(struct ggml_backend_metal_buffer_context * ctx) { | ||||||
|  | #if defined(GGML_METAL_HAS_RESIDENCY_SETS) | ||||||
|  |     if (@available(macOS 15.0, *)) { | ||||||
|  |         if (ctx->rset) { | ||||||
|  |             [ctx->rset endResidency]; | ||||||
|  |             [ctx->rset removeAllAllocations]; | ||||||
|  |             [ctx->rset release]; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | #else | ||||||
|  |     GGML_UNUSED(ctx); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  |  | ||||||
| // finds the Metal buffer that contains the tensor data on the GPU device | // finds the Metal buffer that contains the tensor data on the GPU device | ||||||
| // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the | // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the | ||||||
| // Metal buffer based on the host memory pointer | // Metal buffer based on the host memory pointer | ||||||
| @@ -4176,6 +4253,8 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer) | |||||||
|     for (int i = 0; i < ctx->n_buffers; i++) { |     for (int i = 0; i < ctx->n_buffers; i++) { | ||||||
|         [ctx->buffers[i].metal release]; |         [ctx->buffers[i].metal release]; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     ggml_backend_metal_buffer_rset_free(ctx); | ||||||
|     ggml_backend_metal_device_rel(buffer->buft->device->context); |     ggml_backend_metal_device_rel(buffer->buft->device->context); | ||||||
|  |  | ||||||
|     if (ctx->owned) { |     if (ctx->owned) { | ||||||
| @@ -4198,19 +4277,19 @@ static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) { | |||||||
| static void ggml_backend_metal_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { | static void ggml_backend_metal_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { | ||||||
|     memset((char *)tensor->data + offset, value, size); |     memset((char *)tensor->data + offset, value, size); | ||||||
|  |  | ||||||
|     UNUSED(buffer); |     GGML_UNUSED(buffer); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_backend_metal_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { | static void ggml_backend_metal_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { | ||||||
|     memcpy((char *)tensor->data + offset, data, size); |     memcpy((char *)tensor->data + offset, data, size); | ||||||
|  |  | ||||||
|     UNUSED(buffer); |     GGML_UNUSED(buffer); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_backend_metal_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { | static void ggml_backend_metal_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { | ||||||
|     memcpy(data, (const char *)tensor->data + offset, size); |     memcpy(data, (const char *)tensor->data + offset, size); | ||||||
|  |  | ||||||
|     UNUSED(buffer); |     GGML_UNUSED(buffer); | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool ggml_backend_metal_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) { | static bool ggml_backend_metal_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) { | ||||||
| @@ -4220,7 +4299,7 @@ static bool ggml_backend_metal_buffer_cpy_tensor(ggml_backend_buffer_t buffer, c | |||||||
|     } |     } | ||||||
|     return false; |     return false; | ||||||
|  |  | ||||||
|     UNUSED(buffer); |     GGML_UNUSED(buffer); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_backend_metal_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { | static void ggml_backend_metal_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { | ||||||
| @@ -4246,7 +4325,7 @@ static struct ggml_backend_buffer_i ggml_backend_metal_buffer_i = { | |||||||
| static const char * ggml_backend_metal_buffer_type_get_name(ggml_backend_buffer_type_t buft) { | static const char * ggml_backend_metal_buffer_type_get_name(ggml_backend_buffer_type_t buft) { | ||||||
|     return "Metal"; |     return "Metal"; | ||||||
|  |  | ||||||
|     UNUSED(buft); |     GGML_UNUSED(buft); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t size_aligned) { | static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t size_aligned) { | ||||||
| @@ -4270,8 +4349,8 @@ static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t s | |||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
| #endif | #endif | ||||||
|     UNUSED(device); |     GGML_UNUSED(device); | ||||||
|     UNUSED(size_aligned); |     GGML_UNUSED(size_aligned); | ||||||
| } | } | ||||||
|  |  | ||||||
| static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { | static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { | ||||||
| @@ -4284,7 +4363,8 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba | |||||||
|         size_aligned += (size_page - (size_aligned % size_page)); |         size_aligned += (size_page - (size_aligned % size_page)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     id<MTLDevice> device = ggml_backend_metal_device_acq(buft->device->context); |     struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device->context; | ||||||
|  |     id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev); | ||||||
|  |  | ||||||
|     ctx->all_data = ggml_metal_host_malloc(size_aligned); |     ctx->all_data = ggml_metal_host_malloc(size_aligned); | ||||||
|     ctx->all_size = size_aligned; |     ctx->all_size = size_aligned; | ||||||
| @@ -4307,7 +4387,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba | |||||||
|     if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) { |     if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) { | ||||||
|         GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0); |         GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0); | ||||||
|         free(ctx); |         free(ctx); | ||||||
|         ggml_backend_metal_device_rel(buft->device->context); |         ggml_backend_metal_device_rel(ctx_dev); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) { | ||||||
|  |         GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__); | ||||||
|  |         free(ctx); | ||||||
|  |         ggml_backend_metal_device_rel(ctx_dev); | ||||||
|         return NULL; |         return NULL; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -4318,7 +4405,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba | |||||||
|  |  | ||||||
| static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { | static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { | ||||||
|     return 32; |     return 32; | ||||||
|     UNUSED(buft); |     GGML_UNUSED(buft); | ||||||
| } | } | ||||||
|  |  | ||||||
| static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { | static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { | ||||||
| @@ -4328,13 +4415,13 @@ static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_ty | |||||||
|  |  | ||||||
|     return max_size; |     return max_size; | ||||||
|  |  | ||||||
|     UNUSED(buft); |     GGML_UNUSED(buft); | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) { | static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) { | ||||||
|     return true; |     return true; | ||||||
|  |  | ||||||
|     UNUSED(buft); |     GGML_UNUSED(buft); | ||||||
| } | } | ||||||
|  |  | ||||||
| ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) { | ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) { | ||||||
| @@ -4357,7 +4444,7 @@ ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) { | |||||||
| static const char * ggml_backend_metal_buffer_from_ptr_type_get_name(ggml_backend_buffer_type_t buft) { | static const char * ggml_backend_metal_buffer_from_ptr_type_get_name(ggml_backend_buffer_type_t buft) { | ||||||
|     return "Metal_Mapped"; |     return "Metal_Mapped"; | ||||||
|  |  | ||||||
|     UNUSED(buft); |     GGML_UNUSED(buft); | ||||||
| } | } | ||||||
|  |  | ||||||
| static ggml_backend_buffer_type_t ggml_backend_metal_buffer_from_ptr_type(void) { | static ggml_backend_buffer_type_t ggml_backend_metal_buffer_from_ptr_type(void) { | ||||||
| @@ -4400,7 +4487,8 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz | |||||||
|         size_aligned += (size_page - (size_aligned % size_page)); |         size_aligned += (size_page - (size_aligned % size_page)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     id<MTLDevice> device = ggml_backend_metal_device_acq(&g_ggml_ctx_dev_main); |     struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main; | ||||||
|  |     id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev); | ||||||
|  |  | ||||||
|     // the buffer fits into the max buffer size allowed by the device |     // the buffer fits into the max buffer size allowed by the device | ||||||
|     if (size_aligned <= device.maxBufferLength) { |     if (size_aligned <= device.maxBufferLength) { | ||||||
| @@ -4453,6 +4541,13 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) { | ||||||
|  |         GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__); | ||||||
|  |         free(ctx); | ||||||
|  |         ggml_backend_metal_device_rel(ctx_dev); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size); |     return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -4461,7 +4556,7 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz | |||||||
| static const char * ggml_backend_metal_name(ggml_backend_t backend) { | static const char * ggml_backend_metal_name(ggml_backend_t backend) { | ||||||
|     return "Metal"; |     return "Metal"; | ||||||
|  |  | ||||||
|     UNUSED(backend); |     GGML_UNUSED(backend); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_backend_metal_free(ggml_backend_t backend) { | static void ggml_backend_metal_free(ggml_backend_t backend) { | ||||||
| @@ -4766,6 +4861,13 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) { | ||||||
|  |         GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__); | ||||||
|  |         free(ctx); | ||||||
|  |         ggml_backend_metal_device_rel(ctx_dev); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size); |     return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -4779,7 +4881,7 @@ static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml | |||||||
|     return buft->iface.get_name == ggml_backend_metal_buffer_type_get_name || |     return buft->iface.get_name == ggml_backend_metal_buffer_type_get_name || | ||||||
|             buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name; |             buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name; | ||||||
|  |  | ||||||
|     UNUSED(dev); |     GGML_UNUSED(dev); | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool ggml_backend_metal_device_offload_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { | static bool ggml_backend_metal_device_offload_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov