mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	metal : try to utilize more of the shared memory using smaller views
This commit is contained in:
		| @@ -23,7 +23,7 @@ | ||||
| #include <stdbool.h> | ||||
|  | ||||
| // max memory buffers that can be mapped to the device | ||||
| #define GGML_METAL_MAX_BUFFERS 16 | ||||
| #define GGML_METAL_MAX_BUFFERS 256 | ||||
|  | ||||
| struct ggml_tensor; | ||||
| struct ggml_cgraph; | ||||
|   | ||||
| @@ -262,8 +262,10 @@ bool ggml_metal_add_buffer( | ||||
|             size_aligned += (size_page - (size_aligned % size_page)); | ||||
|         } | ||||
|  | ||||
|         const size_t max_buffer_length = ctx->device.maxBufferLength/4; | ||||
|  | ||||
|         // the buffer fits into the max buffer size allowed by the device | ||||
|         if (size_aligned <= ctx->device.maxBufferLength) { | ||||
|         if (size_aligned <= max_buffer_length) { | ||||
|             ctx->buffers[ctx->n_buffers].name = name; | ||||
|             ctx->buffers[ctx->n_buffers].data = data; | ||||
|             ctx->buffers[ctx->n_buffers].size = size; | ||||
| @@ -282,8 +284,8 @@ bool ggml_metal_add_buffer( | ||||
|             // this overlap between the views will guarantee that the tensor with the maximum size will fully fit into | ||||
|             // one of the views | ||||
|             const size_t size_ovlp = ((max_size + size_page - 1) / size_page + 1) * size_page; // round-up 2 pages just in case | ||||
|             const size_t size_step = ctx->device.maxBufferLength - size_ovlp; | ||||
|             const size_t size_view = ctx->device.maxBufferLength; | ||||
|             const size_t size_step = max_buffer_length - size_ovlp; | ||||
|             const size_t size_view = max_buffer_length; | ||||
|  | ||||
|             for (size_t i = 0; i < size; i += size_step) { | ||||
|                 const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov