mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	* move BLAS to a separate backend * rename GGML_USE_OPENBLAS to GGML_USE_BLAS * alloc : reuse same buffer when the same buffer type if used multiple times * set number of threads automatically for openblas and blis * sched : print assignments when GGML_SCHED_DEBUG env variable is set * sched : allow ops with weights on an incompatible buffer type This will cause the weight to be copied to a backend that supports the op, which is very costly. The weight should have been stored in a buffer of a backend that can run the op, but llama.cpp cannot do this automatically at the moment. --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
		
			
				
	
	
		
			24 lines
		
	
	
		
			526 B
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			24 lines
		
	
	
		
			526 B
		
	
	
	
		
			C
		
	
	
	
	
	
#pragma once
 | 
						|
 | 
						|
#include "ggml.h"
 | 
						|
#include "ggml-backend.h"
 | 
						|
 | 
						|
 | 
						|
#ifdef  __cplusplus
 | 
						|
extern "C" {
 | 
						|
#endif
 | 
						|
 | 
						|
// backend API
 | 
						|
GGML_API GGML_CALL ggml_backend_t ggml_backend_blas_init(void);
 | 
						|
 | 
						|
GGML_API GGML_CALL bool ggml_backend_is_blas(ggml_backend_t backend);
 | 
						|
 | 
						|
// number of threads used for conversion to float
 | 
						|
// for openblas and blis, this will also set the number of threads used for blas operations
 | 
						|
GGML_API GGML_CALL void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
 | 
						|
 | 
						|
 | 
						|
#ifdef  __cplusplus
 | 
						|
}
 | 
						|
#endif
 |