mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	llava : fix bug in minicpm-v code (#11513)
* fix bug in minicpm-v code * update readme of minicpm-v
This commit is contained in:
		| @@ -5,13 +5,25 @@ Currently, this readme only supports minicpm-omni's image capabilities, and we w | ||||
|  | ||||
| Download [MiniCPM-o-2_6](https://huggingface.co/openbmb/MiniCPM-o-2_6) PyTorch model from huggingface to "MiniCPM-o-2_6" folder. | ||||
|  | ||||
|  | ||||
| ### Build llama.cpp | ||||
| Readme modification time: 20250206 | ||||
|  | ||||
| If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) | ||||
|  | ||||
| Clone llama.cpp: | ||||
| ```bash | ||||
| git clone git@github.com:OpenBMB/llama.cpp.git | ||||
| git clone https://github.com/ggerganov/llama.cpp | ||||
| cd llama.cpp | ||||
| git checkout minicpm-omni | ||||
| ``` | ||||
|  | ||||
| Build llama.cpp using `CMake`: | ||||
| ```bash | ||||
| cmake -B build | ||||
| cmake --build build --config Release | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ### Usage of MiniCPM-o 2.6 | ||||
|  | ||||
| Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf) by us) | ||||
| @@ -22,25 +34,15 @@ python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM- | ||||
| python ./convert_hf_to_gguf.py ../MiniCPM-o-2_6/model | ||||
|  | ||||
| # quantize int4 version | ||||
| ./llama-quantize ../MiniCPM-o-2_6/model/ggml-model-f16.gguf ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M | ||||
| ./build/bin/llama-quantize ../MiniCPM-o-2_6/model/ggml-model-f16.gguf ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M | ||||
| ``` | ||||
|  | ||||
| Build llama.cpp using `CMake`: | ||||
| https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md | ||||
|  | ||||
| ```bash | ||||
| cmake -B build | ||||
| cmake --build build --config Release | ||||
| ``` | ||||
|  | ||||
| Inference on Linux or Mac | ||||
| ``` | ||||
| ```bash | ||||
| # run f16 version | ||||
| ./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" | ||||
| ./build/bin/llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" | ||||
|  | ||||
| # run quantized int4 version | ||||
| ./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?" | ||||
|  | ||||
| # or run in interactive mode | ||||
| ./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i | ||||
| ./build/bin/llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?" | ||||
| ``` | ||||
|   | ||||
| @@ -4,13 +4,26 @@ | ||||
|  | ||||
| Download [MiniCPM-Llama3-V-2_5](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5) PyTorch model from huggingface to "MiniCPM-Llama3-V-2_5" folder. | ||||
|  | ||||
|  | ||||
| ### Build llama.cpp | ||||
| Readme modification time: 20250206 | ||||
|  | ||||
| If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) | ||||
|  | ||||
| Clone llama.cpp: | ||||
| ```bash | ||||
| git clone https://github.com/ggml-org/llama.cpp | ||||
| cd llama.cpp | ||||
| ``` | ||||
|  | ||||
| ### Usage | ||||
| Build llama.cpp using `CMake`: | ||||
| ```bash | ||||
| cmake -B build | ||||
| cmake --build build --config Release | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ### Usage of MiniCPM-Llama3-V 2.5 | ||||
|  | ||||
| Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) by us) | ||||
|  | ||||
| @@ -20,80 +33,15 @@ python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM- | ||||
| python ./convert_hf_to_gguf.py ../MiniCPM-Llama3-V-2_5/model | ||||
|  | ||||
| # quantize int4 version | ||||
| ./llama-quantize ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf Q4_K_M | ||||
| ./build/bin/llama-quantize ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf Q4_K_M | ||||
| ``` | ||||
|  | ||||
| Build for Linux or Mac | ||||
|  | ||||
| ```bash | ||||
| make | ||||
| make llama-minicpmv-cli | ||||
| ``` | ||||
|  | ||||
| Inference on Linux or Mac | ||||
| ``` | ||||
| ```bash | ||||
| # run f16 version | ||||
| ./llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" | ||||
| ./build/bin/llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" | ||||
|  | ||||
| # run quantized int4 version | ||||
| ./llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?" | ||||
|  | ||||
| # or run in interactive mode | ||||
| ./llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i | ||||
| ``` | ||||
|  | ||||
| ### Android | ||||
|  | ||||
| #### Build on Android device using Termux | ||||
| We found that build on Android device would bring better runtime performance, so we recommend to build on device. | ||||
|  | ||||
| [Termux](https://github.com/termux/termux-app#installation) is a terminal app on Android device (no root required). | ||||
|  | ||||
| Install tools in Termux: | ||||
| ``` | ||||
| apt update && apt upgrade -y | ||||
| apt install git make cmake | ||||
| ``` | ||||
|  | ||||
| It's recommended to move your model inside the `~/` directory for best performance: | ||||
| ``` | ||||
| cd storage/downloads | ||||
| mv model.gguf ~/ | ||||
| ``` | ||||
|  | ||||
| #### Building the Project using Android NDK | ||||
| Obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake. | ||||
|  | ||||
| Execute the following commands on your computer to avoid downloading the NDK to your mobile. Alternatively, you can also do this in Termux: | ||||
|  | ||||
| ```bash | ||||
| mkdir build-android | ||||
| cd build-android | ||||
| export NDK=/your_ndk_path | ||||
| cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod .. | ||||
| make | ||||
| ``` | ||||
|  | ||||
| Install [termux](https://github.com/termux/termux-app#installation) on your device and run `termux-setup-storage` to get access to your SD card (if Android 11+ then run the command twice). | ||||
|  | ||||
| Finally, copy these built `llama` binaries and the model file to your device storage. Because the file permissions in the Android sdcard cannot be changed, you can copy the executable files to the `/data/data/com.termux/files/home/bin` path, and then execute the following commands in Termux to add executable permission: | ||||
|  | ||||
| (Assumed that you have pushed the built executable files to the /sdcard/llama.cpp/bin path using `adb push`) | ||||
| ``` | ||||
| $cp -r /sdcard/llama.cpp/bin /data/data/com.termux/files/home/ | ||||
| $cd /data/data/com.termux/files/home/bin | ||||
| $chmod +x ./* | ||||
| ``` | ||||
|  | ||||
| Download models and push them to `/sdcard/llama.cpp/`, then move it to `/data/data/com.termux/files/home/model/` | ||||
|  | ||||
| ``` | ||||
| $mv /sdcard/llama.cpp/ggml-model-Q4_K_M.gguf /data/data/com.termux/files/home/model/ | ||||
| $mv /sdcard/llama.cpp/mmproj-model-f16.gguf /data/data/com.termux/files/home/model/ | ||||
| ``` | ||||
|  | ||||
| Now, you can start chatting: | ||||
| ``` | ||||
| $cd /data/data/com.termux/files/home/bin | ||||
| $./llama-minicpmv-cli -m ../model/ggml-model-Q4_K_M.gguf --mmproj ../model/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?" | ||||
| ./build/bin/llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?" | ||||
| ``` | ||||
|   | ||||
| @@ -4,13 +4,25 @@ | ||||
|  | ||||
| Download [MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6) PyTorch model from huggingface to "MiniCPM-V-2_6" folder. | ||||
|  | ||||
|  | ||||
| ### Build llama.cpp | ||||
| Readme modification time: 20250206 | ||||
|  | ||||
| If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) | ||||
|  | ||||
| Clone llama.cpp: | ||||
| ```bash | ||||
| git clone git@github.com:OpenBMB/llama.cpp.git | ||||
| git clone https://github.com/ggerganov/llama.cpp | ||||
| cd llama.cpp | ||||
| git checkout minicpmv-main | ||||
| ``` | ||||
|  | ||||
| Build llama.cpp using `CMake`: | ||||
| ```bash | ||||
| cmake -B build | ||||
| cmake --build build --config Release | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ### Usage of MiniCPM-V 2.6 | ||||
|  | ||||
| Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) by us) | ||||
| @@ -21,87 +33,15 @@ python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM- | ||||
| python ./convert_hf_to_gguf.py ../MiniCPM-V-2_6/model | ||||
|  | ||||
| # quantize int4 version | ||||
| ./llama-quantize ../MiniCPM-V-2_6/model/ggml-model-f16.gguf ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M | ||||
| ./build/bin/llama-quantize ../MiniCPM-V-2_6/model/ggml-model-f16.gguf ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M | ||||
| ``` | ||||
|  | ||||
| Build for Linux or Mac | ||||
|  | ||||
| ```bash | ||||
| make | ||||
| make llama-minicpmv-cli | ||||
| ``` | ||||
|  | ||||
| Inference on Linux or Mac | ||||
| ``` | ||||
| ```bash | ||||
| # run f16 version | ||||
| ./llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" | ||||
| ./build/bin/llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" | ||||
|  | ||||
| # run quantized int4 version | ||||
| ./llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?" | ||||
|  | ||||
| # or run in interactive mode | ||||
| ./llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i | ||||
| ``` | ||||
|  | ||||
| ### Video | ||||
| Install FFmpeg | ||||
| ``` | ||||
| brew install ffmpeg | ||||
| brew install pkg-config | ||||
| ``` | ||||
|  | ||||
| ### Android | ||||
|  | ||||
| #### Build on Android device using Termux | ||||
| We found that build on Android device would bring better runtime performance, so we recommend to build on device. | ||||
|  | ||||
| [Termux](https://github.com/termux/termux-app#installation) is a terminal app on Android device (no root required). | ||||
|  | ||||
| Install tools in Termux: | ||||
| ``` | ||||
| apt update && apt upgrade -y | ||||
| apt install git make cmake | ||||
| ``` | ||||
|  | ||||
| It's recommended to move your model inside the `~/` directory for best performance: | ||||
| ``` | ||||
| cd storage/downloads | ||||
| mv model.gguf ~/ | ||||
| ``` | ||||
|  | ||||
| #### Building the Project using Android NDK | ||||
| Obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake. | ||||
|  | ||||
| Execute the following commands on your computer to avoid downloading the NDK to your mobile. Alternatively, you can also do this in Termux: | ||||
|  | ||||
| ```bash | ||||
| mkdir build-android | ||||
| cd build-android | ||||
| export NDK=/your_ndk_path | ||||
| cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod .. | ||||
| make | ||||
| ``` | ||||
|  | ||||
| Install [termux](https://github.com/termux/termux-app#installation) on your device and run `termux-setup-storage` to get access to your SD card (if Android 11+ then run the command twice). | ||||
|  | ||||
| Finally, copy these built `llama` binaries and the model file to your device storage. Because the file permissions in the Android sdcard cannot be changed, you can copy the executable files to the `/data/data/com.termux/files/home/bin` path, and then execute the following commands in Termux to add executable permission: | ||||
|  | ||||
| (Assumed that you have pushed the built executable files to the /sdcard/llama.cpp/bin path using `adb push`) | ||||
| ``` | ||||
| $cp -r /sdcard/llama.cpp/bin /data/data/com.termux/files/home/ | ||||
| $cd /data/data/com.termux/files/home/bin | ||||
| $chmod +x ./* | ||||
| ``` | ||||
|  | ||||
| Download models and push them to `/sdcard/llama.cpp/`, then move it to `/data/data/com.termux/files/home/model/` | ||||
|  | ||||
| ``` | ||||
| $mv /sdcard/llama.cpp/ggml-model-Q4_K_M.gguf /data/data/com.termux/files/home/model/ | ||||
| $mv /sdcard/llama.cpp/mmproj-model-f16.gguf /data/data/com.termux/files/home/model/ | ||||
| ``` | ||||
|  | ||||
| Now, you can start chatting: | ||||
| ``` | ||||
| $cd /data/data/com.termux/files/home/bin | ||||
| $./llama-minicpmv-cli -m ../model/ggml-model-Q4_K_M.gguf --mmproj ../model/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?" | ||||
| ./build/bin/llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?" | ||||
| ``` | ||||
|   | ||||
| @@ -1378,6 +1378,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { | ||||
|             LOG_INF("%s: vision_encoder: %d\n", __func__, new_clip->has_vision_encoder); | ||||
|             LOG_INF("%s: llava_projector:  %d\n", __func__, new_clip->has_llava_projector); | ||||
|             LOG_INF("%s: minicpmv_projector:  %d\n", __func__, new_clip->has_minicpmv_projector); | ||||
|             LOG_INF("%s: minicpmv_version:  %d\n", __func__, new_clip->minicpmv_version); | ||||
|             LOG_INF("%s: glm_projector:  %d\n", __func__, new_clip->has_glm_projector); | ||||
|             LOG_INF("%s: model size:     %.2f MB\n", __func__, model_size / 1024.0 / 1024.0); | ||||
|             LOG_INF("%s: metadata size:  %.2f MB\n", __func__, ggml_get_mem_size(meta) / 1024.0 / 1024.0); | ||||
|   | ||||
| @@ -148,6 +148,7 @@ static void process_image(struct llava_context * ctx_llava, struct llava_image_e | ||||
|     process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++); | ||||
|     eval_string(ctx_llava->ctx_llama, std::string("</image>").c_str(), params->n_batch, &n_past, false); | ||||
|     if (num_image_embeds > 1) { | ||||
|         if (has_minicpmv_projector == 2) { | ||||
|             size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip); | ||||
|             eval_string(ctx_llava->ctx_llama, std::string("<slice>").c_str(), params->n_batch, &n_past, false); | ||||
|             for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) { | ||||
| @@ -162,6 +163,20 @@ static void process_image(struct llava_context * ctx_llava, struct llava_image_e | ||||
|             } | ||||
|             eval_string(ctx_llava->ctx_llama, std::string("</slice>").c_str(), params->n_batch, &n_past, false); | ||||
|         } | ||||
|         else if (has_minicpmv_projector == 3 || has_minicpmv_projector == 4) { | ||||
|             size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip); | ||||
|             for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) { | ||||
|                 for (size_t j = 0; j < num_image_embeds_col; ++j) { | ||||
|                     eval_string(ctx_llava->ctx_llama, std::string("<slice>").c_str(), params->n_batch, &n_past, false); | ||||
|                     process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++); | ||||
|                     eval_string(ctx_llava->ctx_llama, std::string("</slice>").c_str(), params->n_batch, &n_past, false); | ||||
|                     if (j == num_image_embeds_col - 1) { | ||||
|                         eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     LOG_INF("%s: image token past: %d\n", __func__, n_past); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -597,7 +597,6 @@ elif args.minicpmv_projector is not None: | ||||
|     fname_middle = "mmproj-" | ||||
|     has_text_encoder = False | ||||
|     has_minicpmv_projector = True | ||||
|     minicpmv_version = 4 | ||||
| elif args.vision_only: | ||||
|     fname_middle = "vision-" | ||||
|     has_text_encoder = False | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 tc-mb
					tc-mb