mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-08 10:07:01 +00:00
sycl: add usage of enqueue_functions extension (#14244)
* Add header and namespace to use enqueue_functions extension * Convert submit and parallel_for to use new extension in convert.cpp * Convert submit and parallel_for to use extension in ggml-sycl.cpp * Convert submit and parallel_for to use extension in gla.cpp * Convert submit and parallel_for in mmq.cpp * Convert submit and parallel_for in mmvq.cpp * Convert submit and parallel_for in remaining files * Convert all simple parallel_for to nd_launch from enqueue_functions extension * Wrapping extension in general function Create a general function that enable the enqueue_functions extension if it is enable in the compiler, otherwise call the general SYCL function to launch kernels. --------- Signed-off-by: nscipione <nicolo.scipione@codeplay.com>
This commit is contained in:
@@ -11,13 +11,13 @@ static void gated_linear_attn_f32_kernel(const dpct::queue_ptr stream, u_int B,
|
||||
const u_int n_seq_tokens = T / B;
|
||||
sycl::range<1> block_dims((C / H));
|
||||
sycl::range<1> grid_dims((B * H));
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
/* local memory accessors*/
|
||||
auto _k = sycl::local_accessor<float, 1>(sycl::range<1>(head_size), cgh);
|
||||
auto _r = sycl::local_accessor<float, 1>(sycl::range<1>(head_size), cgh);
|
||||
auto _td = sycl::local_accessor<float, 1>(sycl::range<1>(head_size), cgh);
|
||||
|
||||
cgh.parallel_for(sycl::nd_range<1>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<1> item) {
|
||||
sycl_parallel_for<1>(cgh, sycl::nd_range<1>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<1> item) {
|
||||
u_int tid = item.get_local_id(0);
|
||||
u_int bid = item.get_group(0);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user