rename

2025-11-10 10:27:03 +00:00 · 2024-05-23 20:00:45 +08:00
parent 7a49a6f6dc
commit c536fa6ef9
2 changed files with 12 additions and 12 deletions
--- a/examples/minicpmv/clip.cpp
+++ b/examples/minicpmv/clip.cpp
@@ -75,7 +75,7 @@ static std::string format(const char * fmt, ...) {
 #define KEY_DESCRIPTION    "general.description"
 #define KEY_HAS_TEXT_ENC   "clip.has_text_encoder"
 #define KEY_HAS_VIS_ENC    "clip.has_vision_encoder"
-#define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector"
+#define KEY_HAS_LLAVA_PROJ "clip.has_minicpmv_projector"
 #define KEY_USE_GELU       "clip.use_gelu"
 #define KEY_N_EMBD         "clip.%s.embedding_length"
 #define KEY_N_FF           "clip.%s.feed_forward_length"
@@ -526,7 +526,7 @@ struct clip_vision_model {
 struct clip_ctx {
    bool has_text_encoder    = false;
    bool has_vision_encoder  = false;
-    bool has_llava_projector = false;
+    bool has_minicpmv_projector = false;

    struct clip_vision_model vision_model;
    projector_type proj_type = PROJECTOR_TYPE_MLP;
@@ -606,7 +606,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32

    const int batch_size = imgs->size;

-    if (ctx->has_llava_projector) {
+    if (ctx->has_minicpmv_projector) {
        GGML_ASSERT(batch_size == 1);
    }

@@ -1124,10 +1124,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1, s

        idx = gguf_find_key(ctx, KEY_HAS_LLAVA_PROJ);
        if (idx != -1) {
-            new_clip->has_llava_projector = gguf_get_val_bool(ctx, idx);
+            new_clip->has_minicpmv_projector = gguf_get_val_bool(ctx, idx);
        }

-        GGML_ASSERT(new_clip->has_llava_projector); // see monatis/clip.cpp for image and/or text encoding for semantic search
+        GGML_ASSERT(new_clip->has_minicpmv_projector); // see monatis/clip.cpp for image and/or text encoding for semantic search
        GGML_ASSERT(new_clip->has_vision_encoder);
        GGML_ASSERT(!new_clip->has_text_encoder);

@@ -1137,7 +1137,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1, s
        if (verbosity >= 1) {
            LOG_TEE("%s: text_encoder:   %d\n", __func__, new_clip->has_text_encoder);
            LOG_TEE("%s: vision_encoder: %d\n", __func__, new_clip->has_vision_encoder);
-            LOG_TEE("%s: llava_projector:  %d\n", __func__, new_clip->has_llava_projector);
+            LOG_TEE("%s: llava_projector:  %d\n", __func__, new_clip->has_minicpmv_projector);
            LOG_TEE("%s: model size:     %.2f MB\n", __func__, model_size / 1024.0 / 1024.0);
            LOG_TEE("%s: metadata size:  %.2f MB\n", __func__, ggml_get_mem_size(meta) / 1024.0 / 1024.0);
        }
@@ -1939,7 +1939,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
    }

    int batch_size = imgs->size;
-    if (ctx->has_llava_projector) {
+    if (ctx->has_minicpmv_projector) {
        GGML_ASSERT(batch_size == 1); // TODO: support multiple images
    }