clip : use FA (#16837)

* clip : use FA

* cont : add warning about unsupported ops

* implement "auto" mode for clip flash attn

* clip : print more detailed op support info during warmup

* cont : remove obsolete comment [no ci]

* improve debugging message

* trailing space

* metal : remove stray return

---------

Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
This commit is contained in:
Georgi Gerganov
2025-11-02 22:21:48 +02:00
committed by GitHub
parent cd5e3b5754
commit 2f966b8ed8
9 changed files with 194 additions and 43 deletions

View File

@@ -1,6 +1,7 @@
#pragma once
#include "ggml.h"
#include <stddef.h>
#include <stdint.h>
@@ -22,9 +23,16 @@ enum clip_modality {
CLIP_MODALITY_AUDIO,
};
enum clip_flash_attn_type {
CLIP_FLASH_ATTN_TYPE_AUTO = -1,
CLIP_FLASH_ATTN_TYPE_DISABLED = 0,
CLIP_FLASH_ATTN_TYPE_ENABLED = 1,
};
struct clip_context_params {
bool use_gpu;
enum ggml_log_level verbosity;
enum clip_flash_attn_type flash_attn_type;
};
struct clip_init_result {