mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	CUDA: add set rows for f32 and f16 (#14551)
* CUDA: add set rows for f32 and f16 * Review: change kernel params, use strides from host * Use 1-d kernel * Review: use int64_t for blockDim.x, rename nb->s for clarity
This commit is contained in:
		| @@ -43,6 +43,7 @@ | ||||
| #include "ggml-cuda/upscale.cuh" | ||||
| #include "ggml-cuda/wkv.cuh" | ||||
| #include "ggml-cuda/gla.cuh" | ||||
| #include "ggml-cuda/set-rows.cuh" | ||||
| #include "ggml.h" | ||||
|  | ||||
| #include <algorithm> | ||||
| @@ -2230,6 +2231,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg | ||||
|         case GGML_OP_GET_ROWS_BACK: | ||||
|             ggml_cuda_op_get_rows_back(ctx, dst); | ||||
|             break; | ||||
|         case GGML_OP_SET_ROWS: | ||||
|             ggml_cuda_op_set_rows(ctx, dst); | ||||
|             break; | ||||
|         case GGML_OP_DUP: | ||||
|             ggml_cuda_dup(ctx, dst); | ||||
|             break; | ||||
| @@ -3216,6 +3220,12 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g | ||||
|             { | ||||
|                 return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32 && op->ne[2] == 1 && op->ne[3] == 1; | ||||
|             } break; | ||||
|         case GGML_OP_SET_ROWS: | ||||
|             { | ||||
|                 return (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) && | ||||
|                        op->src[0]->type == GGML_TYPE_F32 && | ||||
|                        op->src[1]->type == GGML_TYPE_I64; | ||||
|             } break; | ||||
|         case GGML_OP_CPY: | ||||
|             { | ||||
|                 ggml_type src0_type = op->src[0]->type; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Aman Gupta
					Aman Gupta