Add OpenCL add kernel (#5151)

* Add OpenCL add kernel * Put add kernel into different string to stay within MSVC string length limit, disable float16 support due to bad results
2025-10-30 08:42:00 +00:00 · 2024-01-26 23:07:32 +01:00
parent bbe7c56c99
commit a1d6df129b
3 changed files with 96 additions and 3 deletions
--- a/ggml.c
+++ b/ggml.c
@@ -7207,6 +7207,17 @@ static void ggml_compute_forward_add_f32(
    const int ith = params->ith;
    const int nth = params->nth;

+#ifdef GGML_USE_CLBLAST
+    if (src1->backend == GGML_BACKEND_GPU) {
+        // TODO: OpenCL kernel support full broadcast
+        GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
+        if (ith == 0) {
+            ggml_cl_add(src0, src1, dst);
+        }
+        return;
+    }
+#endif
+
    const int nr  = ggml_nrows(src0);

    GGML_TENSOR_BINARY_OP_LOCALS