mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	SYCL: Refactor ggml_sycl_compute_forward (#11121)
* SYCL: refactor ggml_sycl_compute_forward * SYCL: add back GGML_USED(dst) to ggml_sycl_cpy * SYCL: add function name to noop debug * SYCL: Some device info print refactoring and add details of XMX availability
This commit is contained in:
		@@ -51,6 +51,10 @@ void ggml_sycl_host_free(void* ptr) try {
 | 
			
		||||
  std::exit(1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool gpu_has_xmx(sycl::device &dev) {
 | 
			
		||||
    return dev.has(sycl::aspect::ext_intel_matrix);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) {
 | 
			
		||||
  const int64_t max_range = std::numeric_limits<int>::max();
 | 
			
		||||
  int64_t sycl_down_blk_size = block_size;
 | 
			
		||||
 
 | 
			
		||||
@@ -662,6 +662,7 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool gpu_has_xmx(sycl::device &dev);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
 | 
			
		||||
                                 const ggml_tensor *src1, ggml_tensor *dst,
 | 
			
		||||
 
 | 
			
		||||
@@ -158,8 +158,9 @@ static void concat_f32_sycl_non_cont(
 | 
			
		||||
      });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
 | 
			
		||||
                                const ggml_tensor *src1, ggml_tensor *dst) {
 | 
			
		||||
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 | 
			
		||||
  const ggml_tensor *src0 = dst->src[0];
 | 
			
		||||
  const ggml_tensor *src1 = dst->src[1];
 | 
			
		||||
  queue_ptr stream = ctx.stream();
 | 
			
		||||
 | 
			
		||||
  const int32_t dim = ((int32_t *)dst->op_params)[0];
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,6 @@
 | 
			
		||||
 | 
			
		||||
#include "common.hpp"
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
 | 
			
		||||
                                const ggml_tensor *src1, ggml_tensor *dst);
 | 
			
		||||
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
 | 
			
		||||
 | 
			
		||||
#endif // GGML_SYCL_CONCAT_HPP
 | 
			
		||||
 
 | 
			
		||||
@@ -71,8 +71,9 @@ static void conv_transpose_1d_f32_f32_sycl(
 | 
			
		||||
        });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
 | 
			
		||||
    const ggml_tensor *src1, ggml_tensor *dst) {
 | 
			
		||||
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 | 
			
		||||
    const ggml_tensor *src0 = dst->src[0];
 | 
			
		||||
    const ggml_tensor *src1 = dst->src[1];
 | 
			
		||||
    const float * src0_d = (const float *)src0->data;
 | 
			
		||||
    const float * src1_d = (const float *)src1->data;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,6 @@
 | 
			
		||||
 | 
			
		||||
#include "common.hpp"
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
 | 
			
		||||
  const ggml_tensor *src1, ggml_tensor *dst);
 | 
			
		||||
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
 | 
			
		||||
 | 
			
		||||
#endif // GGML_SYCL_CONV_HPP
 | 
			
		||||
 
 | 
			
		||||
@@ -882,149 +882,149 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqrt);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sin);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_cos);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_acc);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_silu);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu_quick);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_tanh);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_relu);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sigmoid);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardsigmoid);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardswish);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_exp);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_log);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_neg);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_step);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_leaky_relu);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqr);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_upscale);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pad);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_add);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sub);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_mul);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_div);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -25,52 +25,52 @@ static __dpct_inline__ float op_div(const float a, const float b) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
#endif // GGML_SYCL_ELEMENTWISE_HPP
 | 
			
		||||
 
 | 
			
		||||
@@ -54,18 +54,12 @@ static ggml_sycl_device_info ggml_sycl_init() {
 | 
			
		||||
    GGML_ASSERT(info.device_count <= GGML_SYCL_MAX_DEVICES);
 | 
			
		||||
 | 
			
		||||
    int64_t total_vram = 0;
 | 
			
		||||
#if defined(GGML_SYCL_FORCE_MMQ)
 | 
			
		||||
    GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ:   yes\n", __func__);
 | 
			
		||||
#else
 | 
			
		||||
    GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ:   no\n", __func__);
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(SYCL_USE_XMX)
 | 
			
		||||
    GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
 | 
			
		||||
#else
 | 
			
		||||
    GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
 | 
			
		||||
#endif
 | 
			
		||||
    GGML_LOG_INFO("%s: found %d %s devices:\n", __func__, info.device_count, GGML_SYCL_NAME);
 | 
			
		||||
 | 
			
		||||
/* This is a bit misleading;  reserved for later */
 | 
			
		||||
// #if defined(SYCL_USE_XMX)
 | 
			
		||||
//     GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
 | 
			
		||||
// #else
 | 
			
		||||
//     GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
 | 
			
		||||
// #endif
 | 
			
		||||
    for (int i = 0; i < info.device_count; ++i) {
 | 
			
		||||
        info.devices[i].vmm = 0;
 | 
			
		||||
        dpct::device_info prop;
 | 
			
		||||
@@ -109,11 +103,11 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
 | 
			
		||||
    name = std::regex_replace(name, std::regex("\\(TM\\)"), "");
 | 
			
		||||
 | 
			
		||||
    auto global_mem_size = prop.get_global_mem_size()/1000000;
 | 
			
		||||
 | 
			
		||||
    GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(),
 | 
			
		||||
    std::string xmx = gpu_has_xmx(device) ? "yes" : "no";
 | 
			
		||||
    GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|%14s|\n", id, device_type.c_str(),
 | 
			
		||||
            name.c_str(), version.c_str(), prop.get_max_compute_units(),
 | 
			
		||||
            prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
 | 
			
		||||
            global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str());
 | 
			
		||||
            global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str(), xmx.c_str());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_backend_sycl_print_sycl_devices() {
 | 
			
		||||
@@ -124,16 +118,16 @@ void ggml_backend_sycl_print_sycl_devices() {
 | 
			
		||||
 | 
			
		||||
    GGML_LOG_INFO(
 | 
			
		||||
        "|  |                   |                                       |      "
 | 
			
		||||
        " |Max    |        |Max  |Global |                     |\n");
 | 
			
		||||
        " |Max    |        |Max  |Global |                     |         XMX  |\n");
 | 
			
		||||
    GGML_LOG_INFO(
 | 
			
		||||
        "|  |                   |                                       |      "
 | 
			
		||||
        " |compute|Max work|sub  |mem    |                     |\n");
 | 
			
		||||
        " |compute|Max work|sub  |mem    |                     |          or  |\n");
 | 
			
		||||
    GGML_LOG_INFO(
 | 
			
		||||
        "|ID|        Device Type|                                   "
 | 
			
		||||
        "Name|Version|units  |group   |group|size   |       Driver version|\n");
 | 
			
		||||
        "Name|Version|units  |group   |group|size   |       Driver version| Tensor Cores |\n");
 | 
			
		||||
    GGML_LOG_INFO(
 | 
			
		||||
        "|--|-------------------|---------------------------------------|------"
 | 
			
		||||
        "-|-------|--------|-----|-------|---------------------|\n");
 | 
			
		||||
        "-|-------|--------|-----|-------|---------------------|--------------|\n");
 | 
			
		||||
 | 
			
		||||
    for (int id = 0; id < device_count; ++id) {
 | 
			
		||||
      sycl::device device = dpct::dev_mgr::instance().get_device(id);
 | 
			
		||||
@@ -164,14 +158,18 @@ static void ggml_check_sycl() try {
 | 
			
		||||
    static bool initialized = false;
 | 
			
		||||
 | 
			
		||||
    if (!initialized) {
 | 
			
		||||
        GGML_LOG_INFO("[SYCL] call ggml_check_sycl\n");
 | 
			
		||||
        GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
 | 
			
		||||
        g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
 | 
			
		||||
        GGML_LOG_INFO("%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
 | 
			
		||||
 | 
			
		||||
#if defined(GGML_SYCL_F16)
 | 
			
		||||
        GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__);
 | 
			
		||||
        GGML_LOG_INFO("GGML_SYCL_DEBUG: %d\n", g_ggml_sycl_debug);
 | 
			
		||||
#if defined(GGML_SYCL_FORCE_MMQ)
 | 
			
		||||
        GGML_LOG_INFO("GGML_SYCL_FORCE_MMQ:   yes\n");
 | 
			
		||||
#else
 | 
			
		||||
        GGML_LOG_INFO("%s: GGML_SYCL_F16: no\n", __func__);
 | 
			
		||||
        GGML_LOG_INFO("GGML_SYCL_FORCE_MMQ:   no\n");
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(GGML_SYCL_F16)
 | 
			
		||||
        GGML_LOG_INFO("GGML_SYCL_F16: yes\n");
 | 
			
		||||
#else
 | 
			
		||||
        GGML_LOG_INFO("GGML_SYCL_F16: no\n");
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* NOT REMOVE, keep it for next optimize for XMX.
 | 
			
		||||
@@ -1189,7 +1187,6 @@ std::unique_ptr<ggml_sycl_pool> ggml_backend_sycl_context::new_pool_for_device(q
 | 
			
		||||
/// kernels
 | 
			
		||||
 | 
			
		||||
typedef void (*cpy_kernel_t)(const char * cx, char * cdst);
 | 
			
		||||
typedef void (*ggml_sycl_func_t)(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
 | 
			
		||||
typedef void (*ggml_sycl_op_mul_mat_t)(
 | 
			
		||||
    ggml_backend_sycl_context & ctx,
 | 
			
		||||
    const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
 | 
			
		||||
@@ -3171,33 +3168,33 @@ catch (sycl::exception const &exc) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_repeat);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_repeat);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_get_rows);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_get_rows);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_norm);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_norm);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rms_norm);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s\n", __func__);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_group_norm);
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_group_norm);
 | 
			
		||||
    GGML_SYCL_DEBUG("call %s done\n", __func__);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -3572,9 +3569,10 @@ __dpct_inline__ static void k_copy_dst_from_contiguous(
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
 | 
			
		||||
                                 const ggml_tensor *src1,
 | 
			
		||||
static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
 | 
			
		||||
                                 ggml_tensor *dst) try {
 | 
			
		||||
    const ggml_tensor *src0 = dst->src[0];
 | 
			
		||||
    const ggml_tensor *src1 = dst->src[1];
 | 
			
		||||
    GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
 | 
			
		||||
 | 
			
		||||
    const ggml_tensor *ids = dst->src[2];
 | 
			
		||||
@@ -3740,12 +3738,12 @@ catch (sycl::exception const &exc) {
 | 
			
		||||
  std::exit(1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_scale);
 | 
			
		||||
static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_scale);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_clamp);
 | 
			
		||||
static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_clamp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
 | 
			
		||||
@@ -3787,7 +3785,6 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
 | 
			
		||||
                ggml_type_name(src0->type), ggml_type_name(src1->type));
 | 
			
		||||
        GGML_ABORT("fatal error");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GGML_UNUSED(dst);
 | 
			
		||||
}
 | 
			
		||||
catch (sycl::exception const &exc) {
 | 
			
		||||
@@ -3796,59 +3793,52 @@ catch (sycl::exception const &exc) {
 | 
			
		||||
  std::exit(1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    // TODO: why do we pass dst as src1 here?
 | 
			
		||||
    ggml_sycl_cpy(ctx, src0, dst, nullptr);
 | 
			
		||||
    GGML_UNUSED(src1);
 | 
			
		||||
    ggml_sycl_cpy(ctx, dst->src[0], dst, nullptr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_diag_mask_inf);
 | 
			
		||||
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_diag_mask_inf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_soft_max);
 | 
			
		||||
static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_soft_max);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    GGML_ASSERT(ggml_is_contiguous(src0)); // TODO: this restriction is temporary until non-cont support is implemented
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rope);
 | 
			
		||||
static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_ASSERT(ggml_is_contiguous(dst->src[0])); // TODO: this restriction is temporary until non-cont support is implemented
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rope);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pool2d);
 | 
			
		||||
static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pool2d);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_im2col);
 | 
			
		||||
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_im2col);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    GGML_ASSERT(ggml_is_contiguous(src0));
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum);
 | 
			
		||||
static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    GGML_ASSERT(ggml_is_contiguous(src0));
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum_rows);
 | 
			
		||||
static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum_rows);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    GGML_ASSERT(ggml_is_contiguous(src0));
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argsort);
 | 
			
		||||
static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argsort);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    GGML_ASSERT(ggml_is_contiguous(src0));
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argmax);
 | 
			
		||||
static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
 | 
			
		||||
    ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argmax);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
 | 
			
		||||
    GGML_UNUSED(src0);
 | 
			
		||||
    GGML_UNUSED(src1);
 | 
			
		||||
    GGML_UNUSED(dst);
 | 
			
		||||
    GGML_UNUSED(ctx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_set_main_device(const int main_device) try {
 | 
			
		||||
    if (dpct::get_current_device_id() == static_cast<unsigned int> (main_device)) {
 | 
			
		||||
@@ -3871,191 +3861,189 @@ catch (sycl::exception const &exc) {
 | 
			
		||||
  std::exit(1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * tensor) {
 | 
			
		||||
bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * dst) {
 | 
			
		||||
    if (!g_sycl_loaded) return false;
 | 
			
		||||
 | 
			
		||||
    ggml_sycl_func_t func;
 | 
			
		||||
    if (dst->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(dst->src[0]->buffer)) {
 | 
			
		||||
        ggml_sycl_set_peer_access(dst->src[1]->ne[1], ctx.device);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    switch (tensor->op) {
 | 
			
		||||
    switch (dst->op) {
 | 
			
		||||
        case GGML_OP_ARGMAX:
 | 
			
		||||
            func = ggml_sycl_argmax;
 | 
			
		||||
            ggml_sycl_argmax(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_CONV_TRANSPOSE_1D:
 | 
			
		||||
            func = ggml_sycl_op_conv_transpose_1d;
 | 
			
		||||
            ggml_sycl_op_conv_transpose_1d(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_REPEAT:
 | 
			
		||||
            func = ggml_sycl_repeat;
 | 
			
		||||
            ggml_sycl_repeat(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_GET_ROWS:
 | 
			
		||||
            func = ggml_sycl_get_rows;
 | 
			
		||||
            ggml_sycl_get_rows(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_DUP:
 | 
			
		||||
            func = ggml_sycl_dup;
 | 
			
		||||
            ggml_sycl_dup(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_ADD:
 | 
			
		||||
        case GGML_OP_ADD1: // TODO: more efficient implementation
 | 
			
		||||
            func = ggml_sycl_add;
 | 
			
		||||
            ggml_sycl_add(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_SUB:
 | 
			
		||||
            func = ggml_sycl_sub;
 | 
			
		||||
            ggml_sycl_sub(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_ACC:
 | 
			
		||||
            func = ggml_sycl_acc;
 | 
			
		||||
            ggml_sycl_acc(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_MUL:
 | 
			
		||||
            func = ggml_sycl_mul;
 | 
			
		||||
            ggml_sycl_mul(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_LOG:
 | 
			
		||||
            func = ggml_sycl_log;
 | 
			
		||||
            ggml_sycl_log(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_DIV:
 | 
			
		||||
            func = ggml_sycl_div;
 | 
			
		||||
            ggml_sycl_div(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_UNARY:
 | 
			
		||||
            switch (ggml_get_unary_op(tensor)) {
 | 
			
		||||
            switch (ggml_get_unary_op(dst)) {
 | 
			
		||||
                case GGML_UNARY_OP_NEG:
 | 
			
		||||
                    func = ggml_sycl_neg;
 | 
			
		||||
                    ggml_sycl_neg(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                case GGML_UNARY_OP_STEP:
 | 
			
		||||
                    func = ggml_sycl_step;
 | 
			
		||||
                    ggml_sycl_step(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                case GGML_UNARY_OP_GELU:
 | 
			
		||||
                    func = ggml_sycl_gelu;
 | 
			
		||||
                    ggml_sycl_gelu(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                case GGML_UNARY_OP_SILU:
 | 
			
		||||
                    func = ggml_sycl_silu;
 | 
			
		||||
                    ggml_sycl_silu(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                case GGML_UNARY_OP_GELU_QUICK:
 | 
			
		||||
                    func = ggml_sycl_gelu_quick;
 | 
			
		||||
                    ggml_sycl_gelu_quick(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                case GGML_UNARY_OP_TANH:
 | 
			
		||||
                    func = ggml_sycl_tanh;
 | 
			
		||||
                    ggml_sycl_tanh(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                case GGML_UNARY_OP_RELU:
 | 
			
		||||
                    func = ggml_sycl_relu;
 | 
			
		||||
                    ggml_sycl_relu(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                case GGML_UNARY_OP_SIGMOID:
 | 
			
		||||
                    func = ggml_sycl_sigmoid;
 | 
			
		||||
                    ggml_sycl_sigmoid(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                case GGML_UNARY_OP_HARDSIGMOID:
 | 
			
		||||
                    func = ggml_sycl_hardsigmoid;
 | 
			
		||||
                    ggml_sycl_hardsigmoid(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                case GGML_UNARY_OP_HARDSWISH:
 | 
			
		||||
                    func = ggml_sycl_hardswish;
 | 
			
		||||
                    ggml_sycl_hardswish(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                case GGML_UNARY_OP_EXP:
 | 
			
		||||
                    func = ggml_sycl_exp;
 | 
			
		||||
                    ggml_sycl_exp(ctx, dst);
 | 
			
		||||
                    break;
 | 
			
		||||
                default:
 | 
			
		||||
                    return false;
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_NORM:
 | 
			
		||||
            func = ggml_sycl_norm;
 | 
			
		||||
            ggml_sycl_norm(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_GROUP_NORM:
 | 
			
		||||
            func = ggml_sycl_group_norm;
 | 
			
		||||
            ggml_sycl_group_norm(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_CONCAT:
 | 
			
		||||
            func = ggml_sycl_op_concat;
 | 
			
		||||
            ggml_sycl_op_concat(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_UPSCALE:
 | 
			
		||||
            func = ggml_sycl_upscale;
 | 
			
		||||
            ggml_sycl_upscale(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_PAD:
 | 
			
		||||
            func = ggml_sycl_pad;
 | 
			
		||||
            ggml_sycl_pad(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_LEAKY_RELU:
 | 
			
		||||
            func = ggml_sycl_leaky_relu;
 | 
			
		||||
            ggml_sycl_leaky_relu(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_RMS_NORM:
 | 
			
		||||
            func = ggml_sycl_rms_norm;
 | 
			
		||||
            ggml_sycl_rms_norm(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_MUL_MAT:
 | 
			
		||||
            if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) {
 | 
			
		||||
            if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
            func = ggml_sycl_mul_mat;
 | 
			
		||||
            /* ggml_sycl_mul_mat_id is dependent on ggml_sycl_mul_mat */
 | 
			
		||||
            ggml_sycl_mul_mat(ctx, dst->src[0], dst->src[1], dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_MUL_MAT_ID:
 | 
			
		||||
            if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) {
 | 
			
		||||
            if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
            func = ggml_sycl_mul_mat_id;
 | 
			
		||||
            ggml_sycl_mul_mat_id(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_OUT_PROD:
 | 
			
		||||
            func = ggml_sycl_op_out_prod;
 | 
			
		||||
            ggml_sycl_op_out_prod(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_SCALE:
 | 
			
		||||
            func = ggml_sycl_scale;
 | 
			
		||||
            ggml_sycl_scale(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_SQR:
 | 
			
		||||
            func = ggml_sycl_sqr;
 | 
			
		||||
            ggml_sycl_sqr(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_SQRT:
 | 
			
		||||
            func = ggml_sycl_sqrt;
 | 
			
		||||
            ggml_sycl_sqrt(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_SIN:
 | 
			
		||||
            func = ggml_sycl_sin;
 | 
			
		||||
            ggml_sycl_sin(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_COS:
 | 
			
		||||
            func = ggml_sycl_cos;
 | 
			
		||||
            ggml_sycl_cos(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_CLAMP:
 | 
			
		||||
            func = ggml_sycl_clamp;
 | 
			
		||||
            ggml_sycl_clamp(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_CPY:
 | 
			
		||||
            func = ggml_sycl_cpy;
 | 
			
		||||
            ggml_sycl_cpy(ctx, dst->src[0], dst->src[1], dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_CONT:
 | 
			
		||||
            func = ggml_sycl_dup;
 | 
			
		||||
            ggml_sycl_dup(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_NONE:
 | 
			
		||||
        case GGML_OP_RESHAPE:
 | 
			
		||||
        case GGML_OP_VIEW:
 | 
			
		||||
        case GGML_OP_PERMUTE:
 | 
			
		||||
        case GGML_OP_TRANSPOSE:
 | 
			
		||||
            func = ggml_sycl_nop;
 | 
			
		||||
            GGML_SYCL_DEBUG("%s: Tensor NO-OP\n", __func__);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_DIAG_MASK_INF:
 | 
			
		||||
            func = ggml_sycl_diag_mask_inf;
 | 
			
		||||
            ggml_sycl_diag_mask_inf(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_SOFT_MAX:
 | 
			
		||||
            func = ggml_sycl_soft_max;
 | 
			
		||||
            ggml_sycl_soft_max(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_ROPE:
 | 
			
		||||
            func = ggml_sycl_rope;
 | 
			
		||||
            ggml_sycl_rope(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_IM2COL:
 | 
			
		||||
            func = ggml_sycl_im2col;
 | 
			
		||||
            ggml_sycl_im2col(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_POOL_2D:
 | 
			
		||||
            func = ggml_sycl_pool2d;
 | 
			
		||||
            ggml_sycl_pool2d(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_SUM:
 | 
			
		||||
            func = ggml_sycl_sum;
 | 
			
		||||
            ggml_sycl_sum(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_SUM_ROWS:
 | 
			
		||||
            func = ggml_sycl_sum_rows;
 | 
			
		||||
            ggml_sycl_sum_rows(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_ARGSORT:
 | 
			
		||||
            func = ggml_sycl_argsort;
 | 
			
		||||
            ggml_sycl_argsort(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_TIMESTEP_EMBEDDING:
 | 
			
		||||
            func = ggml_sycl_op_timestep_embedding;
 | 
			
		||||
            ggml_sycl_op_timestep_embedding(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        case GGML_OP_RWKV_WKV6:
 | 
			
		||||
            func = ggml_sycl_op_rwkv_wkv6;
 | 
			
		||||
            ggml_sycl_op_rwkv_wkv6(ctx, dst);
 | 
			
		||||
            break;
 | 
			
		||||
        default:
 | 
			
		||||
            return false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (tensor->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(tensor->src[0]->buffer)) {
 | 
			
		||||
        ggml_sycl_set_peer_access(tensor->src[1]->ne[1], ctx.device);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    func(ctx, tensor->src[0], tensor->src[1], tensor);
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -3,9 +3,9 @@
 | 
			
		||||
#include "outprod.hpp"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
 | 
			
		||||
    const ggml_tensor* src1, ggml_tensor* dst) {
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
 | 
			
		||||
    const ggml_tensor *src0 = dst->src[0];
 | 
			
		||||
    const ggml_tensor *src1 = dst->src[1];
 | 
			
		||||
 | 
			
		||||
    GGML_ASSERT(src0->type == GGML_TYPE_F32);
 | 
			
		||||
    GGML_ASSERT(src1->type == GGML_TYPE_F32);
 | 
			
		||||
 
 | 
			
		||||
@@ -3,8 +3,7 @@
 | 
			
		||||
 | 
			
		||||
#include "common.hpp"
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
 | 
			
		||||
    const ggml_tensor* src1, ggml_tensor* dst);
 | 
			
		||||
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif // GGML_SYCL_OUTPROD_HPP
 | 
			
		||||
 
 | 
			
		||||
@@ -55,8 +55,9 @@ static void timestep_embedding_f32_sycl(
 | 
			
		||||
        });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
 | 
			
		||||
    const ggml_tensor *src1, ggml_tensor * dst) {
 | 
			
		||||
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 | 
			
		||||
    const ggml_tensor *src0 = dst->src[0];
 | 
			
		||||
    const ggml_tensor *src1 = dst->src[1];
 | 
			
		||||
    const float * src0_d = (const float *)src0->data;
 | 
			
		||||
    float * dst_d = (float *)dst->data;
 | 
			
		||||
    dpct::queue_ptr stream = ctx.stream();
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,6 @@
 | 
			
		||||
 | 
			
		||||
#include "common.hpp"
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
 | 
			
		||||
    const ggml_tensor *src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
#endif // GGML_SYCL_TSEMBD_HPP
 | 
			
		||||
 
 | 
			
		||||
@@ -95,8 +95,10 @@ static void rwkv_wkv_f32_kernel(
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
 | 
			
		||||
    const ggml_tensor* src1, ggml_tensor* dst) {
 | 
			
		||||
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
 | 
			
		||||
 | 
			
		||||
    const ggml_tensor *src0 = dst->src[0];
 | 
			
		||||
    const ggml_tensor *src1 = dst->src[1];
 | 
			
		||||
 | 
			
		||||
    const float* k_d = (const float*)dst->src[0]->data;
 | 
			
		||||
    const float* v_d = (const float*)dst->src[1]->data;
 | 
			
		||||
 
 | 
			
		||||
@@ -3,8 +3,7 @@
 | 
			
		||||
 | 
			
		||||
#include "common.hpp"
 | 
			
		||||
 | 
			
		||||
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
 | 
			
		||||
    const ggml_tensor *src1, ggml_tensor * dst);
 | 
			
		||||
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif // GGML_SYCL_WKV6_HPP
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user