mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-18 11:46:58 +00:00
[SYCL] refactor (#6408)
* seperate lower precision GEMM from the main files * fix workgroup size hardcode
This commit is contained in:
@@ -18,8 +18,6 @@
|
||||
#define GGML_SYCL_MAX_DEVICES 48
|
||||
#define GGML_SYCL_NAME "SYCL"
|
||||
|
||||
// FIXME: 1024 from cuda
|
||||
#define GROUP_SIZE 1024
|
||||
#define WARP_SIZE 32
|
||||
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
|
||||
|
||||
|
||||
Reference in New Issue
Block a user