mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	metal : try to utilize more of the shared memory using smaller views
This commit is contained in:
		| @@ -23,7 +23,7 @@ | |||||||
| #include <stdbool.h> | #include <stdbool.h> | ||||||
|  |  | ||||||
| // max memory buffers that can be mapped to the device | // max memory buffers that can be mapped to the device | ||||||
| #define GGML_METAL_MAX_BUFFERS 16 | #define GGML_METAL_MAX_BUFFERS 256 | ||||||
|  |  | ||||||
| struct ggml_tensor; | struct ggml_tensor; | ||||||
| struct ggml_cgraph; | struct ggml_cgraph; | ||||||
|   | |||||||
| @@ -262,8 +262,10 @@ bool ggml_metal_add_buffer( | |||||||
|             size_aligned += (size_page - (size_aligned % size_page)); |             size_aligned += (size_page - (size_aligned % size_page)); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         const size_t max_buffer_length = ctx->device.maxBufferLength/4; | ||||||
|  |  | ||||||
|         // the buffer fits into the max buffer size allowed by the device |         // the buffer fits into the max buffer size allowed by the device | ||||||
|         if (size_aligned <= ctx->device.maxBufferLength) { |         if (size_aligned <= max_buffer_length) { | ||||||
|             ctx->buffers[ctx->n_buffers].name = name; |             ctx->buffers[ctx->n_buffers].name = name; | ||||||
|             ctx->buffers[ctx->n_buffers].data = data; |             ctx->buffers[ctx->n_buffers].data = data; | ||||||
|             ctx->buffers[ctx->n_buffers].size = size; |             ctx->buffers[ctx->n_buffers].size = size; | ||||||
| @@ -282,8 +284,8 @@ bool ggml_metal_add_buffer( | |||||||
|             // this overlap between the views will guarantee that the tensor with the maximum size will fully fit into |             // this overlap between the views will guarantee that the tensor with the maximum size will fully fit into | ||||||
|             // one of the views |             // one of the views | ||||||
|             const size_t size_ovlp = ((max_size + size_page - 1) / size_page + 1) * size_page; // round-up 2 pages just in case |             const size_t size_ovlp = ((max_size + size_page - 1) / size_page + 1) * size_page; // round-up 2 pages just in case | ||||||
|             const size_t size_step = ctx->device.maxBufferLength - size_ovlp; |             const size_t size_step = max_buffer_length - size_ovlp; | ||||||
|             const size_t size_view = ctx->device.maxBufferLength; |             const size_t size_view = max_buffer_length; | ||||||
|  |  | ||||||
|             for (size_t i = 0; i < size; i += size_step) { |             for (size_t i = 0; i < size; i += size_step) { | ||||||
|                 const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i); |                 const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov