mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-28 08:31:25 +00:00 
			
		
		
		
	 f578b86b21
			
		
	
	f578b86b21
	
	
	
		
			
			* move BLAS to a separate backend * rename GGML_USE_OPENBLAS to GGML_USE_BLAS * alloc : reuse same buffer when the same buffer type if used multiple times * set number of threads automatically for openblas and blis * sched : print assignments when GGML_SCHED_DEBUG env variable is set * sched : allow ops with weights on an incompatible buffer type This will cause the weight to be copied to a backend that supports the op, which is very costly. The weight should have been stored in a buffer of a backend that can run the op, but llama.cpp cannot do this automatically at the moment. --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
		
			
				
	
	
		
			24 lines
		
	
	
		
			526 B
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			24 lines
		
	
	
		
			526 B
		
	
	
	
		
			C
		
	
	
	
	
	
| #pragma once
 | |
| 
 | |
| #include "ggml.h"
 | |
| #include "ggml-backend.h"
 | |
| 
 | |
| 
 | |
| #ifdef  __cplusplus
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
| // backend API
 | |
| GGML_API GGML_CALL ggml_backend_t ggml_backend_blas_init(void);
 | |
| 
 | |
| GGML_API GGML_CALL bool ggml_backend_is_blas(ggml_backend_t backend);
 | |
| 
 | |
| // number of threads used for conversion to float
 | |
| // for openblas and blis, this will also set the number of threads used for blas operations
 | |
| GGML_API GGML_CALL void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
 | |
| 
 | |
| 
 | |
| #ifdef  __cplusplus
 | |
| }
 | |
| #endif
 |