mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-13 10:57:15 +00:00
[SYCL] Optimize mul_mat for Q4_0 on Intel GPU (#12035)
* opt performance by reorder for Intel GPU * detect hw type and save opt feature, and print opt feature * correct name * support optimize graph once when compute graph, record the opt status in tensor->extra, make CI passed * add env variable GGML_SYCL_DISABLE_OPT for debug * use syclex::architecture replace the custom hw define, update the guide for GGML_SYCL_DISABLE_OPT * add performance data * mv getrows functions to separeted files * fix global variables --------- Co-authored-by: arthw <14088817+arthw@users.noreply.github.com>
This commit is contained in:
@@ -99,3 +99,20 @@ catch (sycl::exception const &exc) {
|
||||
<< ", line:" << __LINE__ << std::endl;
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
|
||||
void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams) {
|
||||
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
|
||||
for (int64_t is = 0; is < GGML_SYCL_MAX_STREAMS; ++is) {
|
||||
if (extra->events[i][is] != nullptr) {
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(dpct::destroy_event(extra->events[i][is])));
|
||||
}
|
||||
}
|
||||
if (extra->data_device[i] != nullptr && streams.size()>0) {
|
||||
ggml_sycl_set_device(i);
|
||||
SYCL_CHECK(
|
||||
CHECK_TRY_ERROR(sycl::free(extra->data_device[i], *(streams[i]))));
|
||||
}
|
||||
}
|
||||
delete extra;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user