mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llava: Fix cpu-only clip image encoding sefault (#12907)
* llava: Fix cpu-only clip image encoding * clip : no smart ptr for ggml_backend_t * Fix for backend_ptr push_back --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
This commit is contained in:
		| @@ -323,8 +323,8 @@ struct clip_ctx { | ||||
|     std::vector<ggml_backend_t> backend_ptrs; | ||||
|     std::vector<ggml_backend_buffer_type_t> backend_buft; | ||||
|  | ||||
|     ggml_backend_ptr backend; | ||||
|     ggml_backend_ptr backend_cpu; | ||||
|     ggml_backend_t backend; | ||||
|     ggml_backend_t backend_cpu; | ||||
|     ggml_backend_buffer_ptr buf; | ||||
|  | ||||
|     ggml_backend_sched_ptr sched; | ||||
| @@ -332,27 +332,34 @@ struct clip_ctx { | ||||
|     clip_image_size load_image_size; | ||||
|  | ||||
|     clip_ctx(clip_context_params & ctx_params) { | ||||
|         backend_cpu = ggml_backend_ptr(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr)); | ||||
|         backend     = ggml_backend_ptr(ctx_params.use_gpu | ||||
|         backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr); | ||||
|         backend     = ctx_params.use_gpu | ||||
|                         ? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr) | ||||
|                         : nullptr); | ||||
|                         : nullptr; | ||||
|  | ||||
|         if (backend) { | ||||
|             LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend.get())); | ||||
|             backend_ptrs.push_back(backend.get()); | ||||
|             backend_buft.push_back(ggml_backend_get_default_buffer_type(backend.get())); | ||||
|             LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend)); | ||||
|             backend_ptrs.push_back(backend); | ||||
|             backend_buft.push_back(ggml_backend_get_default_buffer_type(backend)); | ||||
|         } else { | ||||
|             backend = std::move(backend_cpu); | ||||
|             backend = backend_cpu; | ||||
|             LOG_INF("%s: CLIP using CPU backend\n", __func__); | ||||
|         } | ||||
|  | ||||
|         backend_ptrs.push_back(backend_cpu.get()); | ||||
|         backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu.get())); | ||||
|         backend_ptrs.push_back(backend_cpu); | ||||
|         backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu)); | ||||
|  | ||||
|         sched.reset( | ||||
|             ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     ~clip_ctx() { | ||||
|         ggml_backend_free(backend); | ||||
|         if (backend != backend_cpu) { | ||||
|             ggml_backend_free(backend_cpu); | ||||
|         } | ||||
|     } | ||||
| }; | ||||
|  | ||||
| static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_image_f32_batch & imgs) { | ||||
| @@ -1428,7 +1435,7 @@ struct clip_model_loader { | ||||
|             } | ||||
|  | ||||
|             // alloc memory and offload data | ||||
|             ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(ctx_clip.backend.get()); | ||||
|             ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(ctx_clip.backend); | ||||
|             ctx_clip.buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(ctx_clip.ctx_data.get(), buft)); | ||||
|             ggml_backend_buffer_set_usage(ctx_clip.buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS); | ||||
|             for (auto & t : tensors_to_load) { | ||||
| @@ -2610,7 +2617,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     ggml_backend_cpu_set_n_threads(ctx->backend_cpu.get(), n_threads); | ||||
|     ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads); | ||||
|  | ||||
|     auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf); | ||||
|     if (status != GGML_STATUS_SUCCESS) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Matt Clayton
					Matt Clayton