sycl : add ARANGE operator (#16362)

* SYCL: update element-wise ops and presets

* clean arange

* Re-trigger CI

---------

Co-authored-by: Gitty Burstein <gitty@example.com>
This commit is contained in:
GittyBurstein
2025-10-16 16:26:21 +03:00
committed by GitHub
parent 7a50cf388a
commit b22572e97d
4 changed files with 40 additions and 0 deletions

View File

@@ -397,6 +397,14 @@ static void acc_f32_sycl(const float *x, const float *y, float *dst,
});
}
template<typename T>
static void arange_kernel(T * dst, const int k, T start, T step,
const sycl::nd_item<1> &item_ct1) {
SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
dst[i] = start + static_cast<T>(i) * step;
}
}
template<typename T>
static void upscale_sycl(const T *x, T *dst, const int nb00, const int nb01,
const int nb02, const int nb03, const int ne10, const int ne11,
@@ -565,6 +573,25 @@ static inline void dispatch_ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx
}
static inline void ggml_sycl_op_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(dst->type == GGML_TYPE_F32);
float start, stop, step;
memcpy(&start, dst->op_params, sizeof(float));
memcpy(&stop, (float *) dst->op_params + 1, sizeof(float));
memcpy(&step, (float *) dst->op_params + 2, sizeof(float));
dpct::queue_ptr stream = ctx.stream();
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
float * dst_ptr = (float *)dst->data;
const int k = (int)ggml_nelements(dst);
const int num_blocks = ceil_div(k, SYCL_ARANGE_BLOCK_SIZE);
stream->parallel_for(
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_ARANGE_BLOCK_SIZE),
sycl::range<1>(SYCL_ARANGE_BLOCK_SIZE)),
[=](sycl::nd_item<1> item_ct1) {
arange_kernel(dst_ptr, k, start, step, item_ct1);
});
}
} // namespace ggml_sycl_detail
@@ -1090,3 +1117,8 @@ void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_geglu_quick(ctx, dst);
}
void ggml_sycl_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/0);
ggml_sycl_detail::ggml_sycl_op_arange(ctx, dst);
}

View File

@@ -81,4 +81,6 @@ void ggml_sycl_swiglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_geglu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
#endif // GGML_SYCL_ELEMENTWISE_HPP

View File

@@ -3832,6 +3832,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
case GGML_OP_GATED_LINEAR_ATTN:
ggml_sycl_op_gated_linear_attn(ctx, dst);
break;
case GGML_OP_ARANGE:
ggml_sycl_arange(ctx, dst);
break;
default:
return false;
}
@@ -4478,6 +4481,8 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_RWKV_WKV7:
case GGML_OP_GATED_LINEAR_ATTN:
return true;
case GGML_OP_ARANGE:
return op->type == GGML_TYPE_F32;
default:
return false;
}

View File

@@ -49,6 +49,7 @@
#define SYCL_ARGMAX_BLOCK_SIZE 256
#define SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE 256
#define SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE 256
#define SYCL_ARANGE_BLOCK_SIZE 256
// dmmv = dequantize_mul_mat_vec
#ifndef GGML_SYCL_DMMV_X