mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	SYCL: Add gated linear attention kernel (#11175)
* SYCL: Add Gated Linear attention kernel * glahpp: add a space at the end of file * gla: Put the barrier inside the main logic loop
This commit is contained in:
		@@ -4040,6 +4040,9 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
 | 
			
		||||
        case GGML_OP_RWKV_WKV6:
 | 
			
		||||
            ggml_sycl_op_rwkv_wkv6(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_GATED_LINEAR_ATTN:
 | 
			
		||||
            ggml_sycl_op_gated_linear_attn(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        default:
 | 
			
		||||
            return false;
 | 
			
		||||
    }
 | 
			
		||||
@@ -4507,6 +4510,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
 | 
			
		||||
        case GGML_OP_LEAKY_RELU:
 | 
			
		||||
        case GGML_OP_TIMESTEP_EMBEDDING:
 | 
			
		||||
        case GGML_OP_RWKV_WKV6:
 | 
			
		||||
        case GGML_OP_GATED_LINEAR_ATTN:
 | 
			
		||||
            return true;
 | 
			
		||||
        default:
 | 
			
		||||
            return false;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user