mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	opencl : no need to allocate cl_mem on heap (#1612)
This commit is contained in:
		| @@ -667,7 +667,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr | |||||||
|     size_t d_size; |     size_t d_size; | ||||||
|     cl_mem d_X; |     cl_mem d_X; | ||||||
|     if (src0->backend == GGML_BACKEND_CL) { |     if (src0->backend == GGML_BACKEND_CL) { | ||||||
|         d_X = *(cl_mem*) src0->data; |         d_X = (cl_mem) src0->data; | ||||||
|     } else { |     } else { | ||||||
|         d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size, CL_MEM_READ_ONLY); |         d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size, CL_MEM_READ_ONLY); | ||||||
|     } |     } | ||||||
| @@ -743,7 +743,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr | |||||||
|     size_t d_size; |     size_t d_size; | ||||||
|     cl_mem d_X; |     cl_mem d_X; | ||||||
|     if (src0->backend == GGML_BACKEND_CL) { |     if (src0->backend == GGML_BACKEND_CL) { | ||||||
|         d_X = *(cl_mem*) src0->data; |         d_X = (cl_mem) src0->data; | ||||||
|     } else { |     } else { | ||||||
|         d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size, CL_MEM_READ_ONLY); |         d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size, CL_MEM_READ_ONLY); | ||||||
|     } |     } | ||||||
| @@ -868,7 +868,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor * | |||||||
|             if (src0->backend == GGML_BACKEND_CPU) { |             if (src0->backend == GGML_BACKEND_CPU) { | ||||||
|                 CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Q, 0, src0, i03, i02, NULL)); |                 CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Q, 0, src0, i03, i02, NULL)); | ||||||
|             } else if (src0->backend == GGML_BACKEND_CL) { |             } else if (src0->backend == GGML_BACKEND_CL) { | ||||||
|                 d_Q = *(cl_mem*) src0->data; |                 d_Q = (cl_mem) src0->data; | ||||||
|             } else { |             } else { | ||||||
|                 GGML_ASSERT(false); |                 GGML_ASSERT(false); | ||||||
|             } |             } | ||||||
| @@ -1011,14 +1011,13 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) { | |||||||
|     const size_t q_sz = ggml_type_size(type) * ne0 * ne1 * ne2 * ne3 / ggml_blck_size(type); |     const size_t q_sz = ggml_type_size(type) * ne0 * ne1 * ne2 * ne3 / ggml_blck_size(type); | ||||||
|  |  | ||||||
|     size_t q_size; |     size_t q_size; | ||||||
|     cl_mem* dst = (cl_mem*) malloc(sizeof(cl_mem)); |     cl_mem dst = ggml_cl_pool_malloc(q_sz, &q_size, CL_MEM_READ_ONLY); | ||||||
|     *dst = ggml_cl_pool_malloc(q_sz, &q_size, CL_MEM_READ_ONLY); |  | ||||||
|  |  | ||||||
|     // copy tensor to device |     // copy tensor to device | ||||||
|     for (int64_t i3 = 0; i3 < ne3; i3++) { |     for (int64_t i3 = 0; i3 < ne3; i3++) { | ||||||
|         for (int64_t i2 = 0; i2 < ne2; i2++) { |         for (int64_t i2 = 0; i2 < ne2; i2++) { | ||||||
|             int i = i3*ne2 + i2; |             int i = i3*ne2 + i2; | ||||||
|             CL_CHECK(ggml_cl_h2d_tensor_2d(queue, *dst, i*ne0*ne1, tensor, i3, i2, NULL)); |             CL_CHECK(ggml_cl_h2d_tensor_2d(queue, dst, i*ne0*ne1, tensor, i3, i2, NULL)); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Howard Su
					Howard Su