mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-28 08:31:25 +00:00 
			
		
		
		
	server : support llava 1.6 (#5553)
* server: init working 1.6 * move clip_image to header * remove commented code * remove c++ style from header * remove todo * expose llava_image_embed_make_with_clip_img * fix zig build
This commit is contained in:
		
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -719,7 +719,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C | |||||||
| 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) | 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) | ||||||
| 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) | 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) | ||||||
|  |  | ||||||
| server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) | server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h examples/llava/llava.h examples/llava/llava.cpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) | ||||||
| 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) | 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) | ||||||
| 	$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual | 	$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual | ||||||
| 	$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2) | 	$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2) | ||||||
|   | |||||||
| @@ -123,6 +123,7 @@ pub fn build(b: *std.build.Builder) !void { | |||||||
|     const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp"); |     const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp"); | ||||||
|     const train = make.obj("train", "common/train.cpp"); |     const train = make.obj("train", "common/train.cpp"); | ||||||
|     const clip = make.obj("clip", "examples/llava/clip.cpp"); |     const clip = make.obj("clip", "examples/llava/clip.cpp"); | ||||||
|  |     const llava = make.obj("llava", "examples/llava/llava.cpp"); | ||||||
|  |  | ||||||
|     _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, console, grammar_parser }); |     _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, console, grammar_parser }); | ||||||
|     _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo }); |     _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo }); | ||||||
| @@ -131,7 +132,7 @@ pub fn build(b: *std.build.Builder) !void { | |||||||
|     _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train }); |     _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train }); | ||||||
|     _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train }); |     _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train }); | ||||||
|  |  | ||||||
|     const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip }); |     const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip, llava }); | ||||||
|     if (server.target.isWindows()) { |     if (server.target.isWindows()) { | ||||||
|         server.linkSystemLibrary("ws2_32"); |         server.linkSystemLibrary("ws2_32"); | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -311,7 +311,7 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * | |||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) { | bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) { | ||||||
|     float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*6); // TODO: base on gridsize/llava model |     float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*6); // TODO: base on gridsize/llava model | ||||||
|     if (!image_embd) { |     if (!image_embd) { | ||||||
|         fprintf(stderr, "Unable to allocate memory for image embeddings\n"); |         fprintf(stderr, "Unable to allocate memory for image embeddings\n"); | ||||||
|   | |||||||
| @@ -31,6 +31,8 @@ struct llava_image_embed { | |||||||
| /** sanity check for clip <-> llava embed size match */ | /** sanity check for clip <-> llava embed size match */ | ||||||
| LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip); | LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip); | ||||||
|  |  | ||||||
|  | LLAVA_API bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out); | ||||||
|  |  | ||||||
| /** build an image embed from image file bytes */ | /** build an image embed from image file bytes */ | ||||||
| LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length); | LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length); | ||||||
| /** build an image embed from a path to an image filename */ | /** build an image embed from a path to an image filename */ | ||||||
|   | |||||||
| @@ -5,6 +5,7 @@ | |||||||
| #include "oai.hpp" | #include "oai.hpp" | ||||||
|  |  | ||||||
| #include "../llava/clip.h" | #include "../llava/clip.h" | ||||||
|  | #include "../llava/llava.h" | ||||||
|  |  | ||||||
| #include "stb_image.h" | #include "stb_image.h" | ||||||
|  |  | ||||||
| @@ -997,43 +998,12 @@ struct llama_server_context | |||||||
|             { |             { | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             clip_image_f32_batch img_res_v; |  | ||||||
|             img_res_v.size = 0; |             if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) { | ||||||
|             img_res_v.data = nullptr; |  | ||||||
|             if (!clip_image_preprocess(clp_ctx, img.img_data, img_res_v)) |  | ||||||
|             { |  | ||||||
|                 LOG_TEE("Error processing the given image"); |  | ||||||
|                 clip_free(clp_ctx); |  | ||||||
|                 clip_image_f32_batch_free(img_res_v); |  | ||||||
|                 return false; |  | ||||||
|             } |  | ||||||
|             if (img_res_v.size == 0) |  | ||||||
|             { |  | ||||||
|                 LOG_TEE("Error processing the given image"); |                 LOG_TEE("Error processing the given image"); | ||||||
|                 return false; |                 return false; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // note: assumes only one image was returned by clip_image_preprocess |  | ||||||
|             clip_image_f32 * img_res = img_res_v.data; |  | ||||||
|  |  | ||||||
|             img.image_tokens = clip_n_patches(clp_ctx); |  | ||||||
|             img.image_embedding = (float *)malloc(clip_embd_nbytes(clp_ctx)); |  | ||||||
|             if (!img.image_embedding) |  | ||||||
|             { |  | ||||||
|                 LOG_TEE("Unable to allocate memory for image embeddings\n"); |  | ||||||
|                 clip_image_f32_batch_free(img_res_v); |  | ||||||
|                 clip_free(clp_ctx); |  | ||||||
|                 return false; |  | ||||||
|             } |  | ||||||
|             LOG_TEE("slot %i - encoding image [id: %i]\n", slot.id, img.id); |  | ||||||
|             if (!clip_image_encode(clp_ctx, params.n_threads, img_res, img.image_embedding)) |  | ||||||
|             { |  | ||||||
|                 LOG_TEE("Unable to encode image\n"); |  | ||||||
|                 clip_image_f32_batch_free(img_res_v); |  | ||||||
|                 return false; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             clip_image_f32_batch_free(img_res_v); |  | ||||||
|  |  | ||||||
|             img.request_encode_image = false; |             img.request_encode_image = false; | ||||||
|         } |         } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 CJ Pais
					CJ Pais