mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-17 11:37:10 +00:00
* Revert "sycl: add usage of enqueue_functions extension (#14244)"
This reverts commit 8308f98c7f.
* fix missed revert code, format the code
This commit is contained in:
@@ -33,11 +33,14 @@ static void dequantize_block_sycl(const void *__restrict__ vx,
|
||||
{
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
sycl_parallel_for(
|
||||
stream,
|
||||
sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_DEQUANTIZE_BLOCK_SIZE),
|
||||
sycl::range<3>(1, 1, SYCL_DEQUANTIZE_BLOCK_SIZE)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block<qk, qr, dequantize_kernel>(vx, y, k, item_ct1); });
|
||||
stream->parallel_for(
|
||||
sycl::nd_range<3>(
|
||||
sycl::range<3>(1, 1, num_blocks) *
|
||||
sycl::range<3>(1, 1, SYCL_DEQUANTIZE_BLOCK_SIZE),
|
||||
sycl::range<3>(1, 1, SYCL_DEQUANTIZE_BLOCK_SIZE)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block<qk, qr, dequantize_kernel>(vx, y, k, item_ct1);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,18 +53,24 @@ static void dequantize_row_q2_K_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_parallel_for(
|
||||
stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q2_K(vx, y, item_ct1); });
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 64),
|
||||
sycl::range<3>(1, 1, 64)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q2_K(vx, y, item_ct1);
|
||||
});
|
||||
}
|
||||
#else
|
||||
{
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_parallel_for(
|
||||
stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q2_K(vx, y, item_ct1); });
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q2_K(vx, y, item_ct1);
|
||||
});
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -76,18 +85,24 @@ static void dequantize_row_q3_K_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_parallel_for(
|
||||
stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q3_K(vx, y, item_ct1); });
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 64),
|
||||
sycl::range<3>(1, 1, 64)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q3_K(vx, y, item_ct1);
|
||||
});
|
||||
}
|
||||
#else
|
||||
{
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_parallel_for(
|
||||
stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q3_K(vx, y, item_ct1); });
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q3_K(vx, y, item_ct1);
|
||||
});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -101,9 +116,12 @@ static void dequantize_row_q4_0_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_parallel_for(
|
||||
stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q4_0(vx, y, nb32, item_ct1); });
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q4_0(vx, y, nb32, item_ct1);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,12 +135,13 @@ static void dequantize_row_q4_0_sycl_reorder(const void *vx, dst_t *y, const int
|
||||
int constexpr WARP_K = WARP_SIZE * QK4_0;
|
||||
const int n_warp = (k + WARP_K - 1) / WARP_K;
|
||||
GGML_ASSERT(k % 2 == 0);
|
||||
sycl_parallel_for(stream,
|
||||
sycl::nd_range<3>(sycl::range<3>(1, 1, n_warp) * sycl::range<3>(1, 1, WARP_SIZE),
|
||||
sycl::range<3>(1, 1, WARP_SIZE)),
|
||||
[=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
dequantize_block_q4_0_reorder(vx, y, k, item_ct1);
|
||||
});
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, n_warp) *
|
||||
sycl::range<3>(1, 1, WARP_SIZE),
|
||||
sycl::range<3>(1, 1, WARP_SIZE)),
|
||||
[=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]]{
|
||||
dequantize_block_q4_0_reorder(vx, y, k, item_ct1);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
template <typename dst_t>
|
||||
@@ -134,9 +153,12 @@ static void dequantize_row_q4_1_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_parallel_for(
|
||||
stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q4_1(vx, y, nb32, item_ct1); });
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q4_1(vx, y, nb32, item_ct1);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,13 +171,14 @@ static void dequantize_row_q4_K_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
sycl::local_accessor<uint8_t, 1> scale_local_acc(sycl::range<1>(12), cgh);
|
||||
sycl_parallel_for(
|
||||
cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q4_K(vx, y, get_pointer(scale_local_acc), item_ct1);
|
||||
});
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q4_K(vx, y, get_pointer(scale_local_acc), item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -168,13 +191,13 @@ static void dequantize_row_q4_K_sycl_reorder(const void * vx, dst_t * y, const i
|
||||
|
||||
dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 });
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
sycl::local_accessor<uint8_t, 1> scale_local_acc(sycl::range<1>(12), cgh);
|
||||
|
||||
sycl_parallel_for<1>(cgh, sycl::nd_range<1>(sycl::range<1>(global_size), sycl::range<1>(local_size)),
|
||||
[=](sycl::nd_item<1> item_ct1) {
|
||||
dequantize_block_q4_K_reorder(vx, y, get_pointer(scale_local_acc), item_ct1, nb);
|
||||
});
|
||||
cgh.parallel_for(sycl::nd_range<1>(sycl::range<1>(global_size), sycl::range<1>(local_size)),
|
||||
[=](sycl::nd_item<1> item_ct1) {
|
||||
dequantize_block_q4_K_reorder(vx, y, get_pointer(scale_local_acc), item_ct1, nb);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -187,18 +210,24 @@ static void dequantize_row_q5_K_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_parallel_for(
|
||||
stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q5_K(vx, y, item_ct1); });
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 64),
|
||||
sycl::range<3>(1, 1, 64)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q5_K(vx, y, item_ct1);
|
||||
});
|
||||
}
|
||||
#else
|
||||
{
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_parallel_for(
|
||||
stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q5_K(vx, y, item_ct1); });
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q5_K(vx, y, item_ct1);
|
||||
});
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -213,18 +242,24 @@ static void dequantize_row_q6_K_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_parallel_for(
|
||||
stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q6_K(vx, y, item_ct1); });
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 64),
|
||||
sycl::range<3>(1, 1, 64)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q6_K(vx, y, item_ct1);
|
||||
});
|
||||
}
|
||||
#else
|
||||
{
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_parallel_for(
|
||||
stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q6_K(vx, y, item_ct1); });
|
||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_q6_K(vx, y, item_ct1);
|
||||
});
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -236,9 +271,9 @@ static void dequantize_row_q6_K_sycl_reorder(const void * vx, dst_t * y, const i
|
||||
|
||||
dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 });
|
||||
|
||||
sycl_parallel_for(stream,
|
||||
sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q6_K_reorder(vx, y, item_ct1, nb); });
|
||||
stream->parallel_for(
|
||||
sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_q6_K_reorder(vx, y, item_ct1, nb); });
|
||||
}
|
||||
|
||||
template <typename dst_t>
|
||||
@@ -249,10 +284,15 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
sycl_parallel_for(
|
||||
cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_iq1_s(vx, y, item_ct1, iq1s_grid_gpu); });
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq1_s(
|
||||
vx, y, item_ct1, iq1s_grid_gpu
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -265,10 +305,15 @@ static void dequantize_row_iq1_m_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
sycl_parallel_for(
|
||||
cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_iq1_m(vx, y, item_ct1, iq1s_grid_gpu); });
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq1_m(
|
||||
vx, y, item_ct1, iq1s_grid_gpu
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -281,12 +326,15 @@ static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int64_t
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
sycl_parallel_for(
|
||||
cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq2_xxs(vx, y, item_ct1, iq2xxs_grid, ksigns_iq2xs, kmask_iq2xs);
|
||||
});
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq2_xxs(
|
||||
vx, y, item_ct1, iq2xxs_grid,
|
||||
ksigns_iq2xs, kmask_iq2xs);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -299,12 +347,15 @@ static void dequantize_row_iq2_xs_sycl(const void *vx, dst_t *y, const int64_t k
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
sycl_parallel_for(
|
||||
cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq2_xs(vx, y, item_ct1, iq2xs_grid, ksigns_iq2xs, kmask_iq2xs);
|
||||
});
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq2_xs(
|
||||
vx, y, item_ct1, iq2xs_grid,
|
||||
ksigns_iq2xs, kmask_iq2xs);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -317,10 +368,13 @@ static void dequantize_row_iq2_s_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
sycl_parallel_for(
|
||||
cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_iq2_s(vx, y, item_ct1); });
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq2_s(vx, y, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -334,12 +388,15 @@ static void dequantize_row_iq3_xxs_sycl(const void *vx, dst_t *y, const int64_t
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
sycl_parallel_for(
|
||||
cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq3_xxs(vx, y, item_ct1, iq3xxs_grid, ksigns_iq2xs, kmask_iq2xs);
|
||||
});
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq3_xxs(
|
||||
vx, y, item_ct1, iq3xxs_grid,
|
||||
ksigns_iq2xs, kmask_iq2xs);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -352,10 +409,14 @@ static void dequantize_row_iq3_s_sycl(const void *vx, dst_t *y, const int64_t k,
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
sycl_parallel_for(
|
||||
cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_iq3_s(vx, y, item_ct1, kmask_iq2xs, iq3s_grid); });
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq3_s(
|
||||
vx, y, item_ct1, kmask_iq2xs, iq3s_grid);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -371,11 +432,14 @@ static void dequantize_row_iq4_xs_sycl(const void *vx, dst_t *y, const int64_t k
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
sycl_parallel_for(
|
||||
cgh,
|
||||
sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_iq4_xs(vx, y, item_ct1); });
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq4_xs(vx, y, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
#endif
|
||||
@@ -389,11 +453,14 @@ static void dequantize_row_iq4_nl_sycl(const void *vx, dst_t *y, const int64_t k
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
sycl_launch(stream, [&](sycl::handler & cgh) {
|
||||
sycl_parallel_for(
|
||||
cgh,
|
||||
sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) { dequantize_block_iq4_nl(vx, y, item_ct1); });
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
sycl::range<3>(1, 1, 32)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
dequantize_block_iq4_nl(vx, y, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user