mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-22 12:27:26 +00:00
CUDA: fix should_use_mmvf for ne11 == 1 (#17085)
* CUDA: fix should_use_mmvf for ne11 == 1 * Apply suggestion from @am17an Co-authored-by: Aman Gupta <amangupta052@gmail.com> --------- Co-authored-by: Aman Gupta <amangupta052@gmail.com>
This commit is contained in:
@@ -129,7 +129,13 @@ bool ggml_cuda_should_use_mmf(enum ggml_type type, int cc, int warp_size, const
|
||||
if (src0_ne[0] % (warp_size * (4/ts)) != 0) {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < GGML_MAX_DIMS; ++i) {
|
||||
|
||||
if (src0_nb[0] != ts) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Pointers not aligned to the size of half2/nv_bfloat162/float2 would result in a crash:
|
||||
for (size_t i = 1; i < GGML_MAX_DIMS; ++i) {
|
||||
if (src0_nb[i] % (2*ts) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user