CUDA: non-contiguous (RMS) norm support (#11659)

* CUDA: non-contiguous (RMS) norm support

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Johannes Gäßler
2025-02-04 22:21:42 +01:00
committed by GitHub
parent 3ec9fd4b77
commit fd08255d0d
6 changed files with 97 additions and 47 deletions

View File

@@ -1206,10 +1206,11 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
case GGML_OP_GROUP_NORM:
return has_simdgroup_reduction;
case GGML_OP_RMS_NORM:
return has_simdgroup_reduction && (op->ne[0] % 4 == 0);
return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && ggml_is_contiguous_1(op->src[0]));
case GGML_OP_ARGMAX:
case GGML_OP_NORM:
return true;
case GGML_OP_NORM:
return has_simdgroup_reduction && ggml_is_contiguous(op->src[0]);
case GGML_OP_ROPE:
{
const int mode = ((const int32_t *) op->op_params)[2];