clip : use FA (#16837)

* clip : use FA * cont : add warning about unsupported ops * implement "auto" mode for clip flash attn * clip : print more detailed op support info during warmup * cont : remove obsolete comment [no ci] * improve debugging message * trailing space * metal : remove stray return --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
2025-11-09 10:17:06 +00:00 · 2025-11-02 22:21:48 +02:00
parent cd5e3b5754
commit 2f966b8ed8
9 changed files with 194 additions and 43 deletions
--- a/tools/mtmd/clip.h
+++ b/tools/mtmd/clip.h
@@ -1,6 +1,7 @@
 #pragma once

 #include "ggml.h"
+
 #include <stddef.h>
 #include <stdint.h>

@@ -22,9 +23,16 @@ enum clip_modality {
    CLIP_MODALITY_AUDIO,
 };

+enum clip_flash_attn_type {
+    CLIP_FLASH_ATTN_TYPE_AUTO     = -1,
+    CLIP_FLASH_ATTN_TYPE_DISABLED = 0,
+    CLIP_FLASH_ATTN_TYPE_ENABLED  = 1,
+};
+
 struct clip_context_params {
    bool use_gpu;
    enum ggml_log_level verbosity;
+    enum clip_flash_attn_type flash_attn_type;
 };

 struct clip_init_result {