mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	cuda : increase max block size to 1024
This commit is contained in:
		@@ -443,7 +443,7 @@ static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_
 | 
			
		||||
#define CUDA_SCALE_BLOCK_SIZE 256
 | 
			
		||||
#define CUDA_CLAMP_BLOCK_SIZE 256
 | 
			
		||||
#define CUDA_ROPE_BLOCK_SIZE 256
 | 
			
		||||
#define CUDA_SOFT_MAX_BLOCK_SIZE 512
 | 
			
		||||
#define CUDA_SOFT_MAX_BLOCK_SIZE 1024
 | 
			
		||||
#define CUDA_ALIBI_BLOCK_SIZE 32
 | 
			
		||||
#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32
 | 
			
		||||
#define CUDA_QUANTIZE_BLOCK_SIZE 256
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user