mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	Add OpenCL add kernel (#5151)
* Add OpenCL add kernel * Put add kernel into different string to stay within MSVC string length limit, disable float16 support due to bad results
This commit is contained in:
		
							
								
								
									
										11
									
								
								ggml.c
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								ggml.c
									
									
									
									
									
								
							| @@ -7207,6 +7207,17 @@ static void ggml_compute_forward_add_f32( | ||||
|     const int ith = params->ith; | ||||
|     const int nth = params->nth; | ||||
|  | ||||
| #ifdef GGML_USE_CLBLAST | ||||
|     if (src1->backend == GGML_BACKEND_GPU) { | ||||
|         // TODO: OpenCL kernel support full broadcast | ||||
|         GGML_ASSERT(ggml_can_repeat_rows(src1, src0)); | ||||
|         if (ith == 0) { | ||||
|             ggml_cl_add(src0, src1, dst); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     const int nr  = ggml_nrows(src0); | ||||
|  | ||||
|     GGML_TENSOR_BINARY_OP_LOCALS | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 0cc4m
					0cc4m