mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	CUDA: remove incorrect precision check (#7454)
This commit is contained in:
		@@ -286,9 +286,6 @@ void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_ten
 | 
				
			|||||||
    const ggml_tensor * KQV = dst;
 | 
					    const ggml_tensor * KQV = dst;
 | 
				
			||||||
    const ggml_tensor * Q   = dst->src[0];
 | 
					    const ggml_tensor * Q   = dst->src[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const int32_t precision = KQV->op_params[2];
 | 
					 | 
				
			||||||
    GGML_ASSERT(precision == GGML_PREC_DEFAULT);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (Q->ne[1] <= 16) {
 | 
					    if (Q->ne[1] <= 16) {
 | 
				
			||||||
        constexpr int cols_per_block = 16;
 | 
					        constexpr int cols_per_block = 16;
 | 
				
			||||||
        constexpr int parallel_blocks = 4;
 | 
					        constexpr int parallel_blocks = 4;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user