mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	move BLAS to a separate backend (#6210)
* move BLAS to a separate backend * rename GGML_USE_OPENBLAS to GGML_USE_BLAS * alloc : reuse same buffer when the same buffer type if used multiple times * set number of threads automatically for openblas and blis * sched : print assignments when GGML_SCHED_DEBUG env variable is set * sched : allow ops with weights on an incompatible buffer type This will cause the weight to be copied to a backend that supports the op, which is very costly. The weight should have been stored in a buffer of a backend that can run the op, but llama.cpp cannot do this automatically at the moment. --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		
							
								
								
									
										21
									
								
								ggml-rpc.cpp
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								ggml-rpc.cpp
									
									
									
									
									
								
							| @@ -540,22 +540,12 @@ GGML_CALL static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend | ||||
|     return ggml_nbytes(tensor); | ||||
| } | ||||
|  | ||||
| GGML_CALL static bool ggml_backend_rpc_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) { | ||||
|     if (!ggml_backend_is_rpc(backend)) { | ||||
|         return false; | ||||
|     } | ||||
|     ggml_backend_rpc_buffer_type_context * buft_ctx = (ggml_backend_rpc_buffer_type_context *)buft->context; | ||||
|     ggml_backend_rpc_context * rpc_ctx = (ggml_backend_rpc_context *)backend->context; | ||||
|     return buft_ctx->endpoint == rpc_ctx->endpoint; | ||||
| } | ||||
|  | ||||
| static ggml_backend_buffer_type_i ggml_backend_rpc_buffer_type_interface = { | ||||
|     /* .get_name         = */ ggml_backend_rpc_buffer_type_name, | ||||
|     /* .alloc_buffer     = */ ggml_backend_rpc_buffer_type_alloc_buffer, | ||||
|     /* .get_alignment    = */ ggml_backend_rpc_buffer_type_get_alignment, | ||||
|     /* .get_max_size     = */ ggml_backend_rpc_get_max_size, | ||||
|     /* .get_alloc_size   = */ ggml_backend_rpc_buffer_type_get_alloc_size, | ||||
|     /* .supports_backend = */ ggml_backend_rpc_buffer_type_supports_backend, | ||||
|     /* .is_host          = */ NULL, | ||||
| }; | ||||
|  | ||||
| @@ -638,6 +628,15 @@ GGML_CALL static bool ggml_backend_rpc_supports_op(ggml_backend_t backend, const | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| GGML_CALL static bool ggml_backend_rpc_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) { | ||||
|     if (buft->iface.get_name == ggml_backend_rpc_buffer_type_name) { | ||||
|         return false; | ||||
|     } | ||||
|     ggml_backend_rpc_buffer_type_context * buft_ctx = (ggml_backend_rpc_buffer_type_context *)buft->context; | ||||
|     ggml_backend_rpc_context * rpc_ctx = (ggml_backend_rpc_context *)backend->context; | ||||
|     return buft_ctx->endpoint == rpc_ctx->endpoint; | ||||
| } | ||||
|  | ||||
| static ggml_backend_i ggml_backend_rpc_interface = { | ||||
|     /* .get_name                = */ ggml_backend_rpc_name, | ||||
|     /* .free                    = */ ggml_backend_rpc_free, | ||||
| @@ -648,9 +647,11 @@ static ggml_backend_i ggml_backend_rpc_interface = { | ||||
|     /* .synchronize             = */ ggml_backend_rpc_synchronize, | ||||
|     /* .graph_plan_create       = */ NULL, | ||||
|     /* .graph_plan_free         = */ NULL, | ||||
|     /* .graph_plan_update       = */ NULL, | ||||
|     /* .graph_plan_compute      = */ NULL, | ||||
|     /* .graph_compute           = */ ggml_backend_rpc_graph_compute, | ||||
|     /* .supports_op             = */ ggml_backend_rpc_supports_op, | ||||
|     /* .supports_buft           = */ ggml_backend_rpc_supports_buft, | ||||
|     /* .offload_op              = */ NULL, | ||||
|     /* .event_new               = */ NULL, | ||||
|     /* .event_free              = */ NULL, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren