CUDA: use CUB for arbitary size argsort (#16754)

This commit is contained in:
Aman Gupta
2025-10-24 20:46:19 +08:00
committed by GitHub
parent 69e9ff0103
commit 0bcb40b48c
2 changed files with 104 additions and 5 deletions

View File

@@ -3642,8 +3642,11 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_SUM:
return ggml_is_contiguous_rows(op->src[0]);
case GGML_OP_ARGSORT:
// TODO: Support arbitrary column width
#ifndef GGML_CUDA_USE_CUB
return op->src[0]->ne[0] <= 1024;
#else
return true;
#endif
case GGML_OP_SUM_ROWS:
case GGML_OP_MEAN:
case GGML_OP_GROUP_NORM: