mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	CUDA: set_rows + cpy.cu refactor (#14712)
This commit is contained in:
		| @@ -3226,8 +3226,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g | ||||
|             } break; | ||||
|         case GGML_OP_SET_ROWS: | ||||
|             { | ||||
| #pragma message("TODO: implement Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)") | ||||
|                 return (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16 || op->type == GGML_TYPE_BF16) && | ||||
|                 return (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16 || op->type == GGML_TYPE_BF16 || | ||||
|                        op->type == GGML_TYPE_Q4_0 || op->type == GGML_TYPE_Q4_1 || op->type == GGML_TYPE_Q5_0 || | ||||
|                        op->type == GGML_TYPE_Q5_1 || op->type == GGML_TYPE_Q8_0 || op->type == GGML_TYPE_IQ4_NL) && | ||||
|                        op->src[0]->type == GGML_TYPE_F32 && | ||||
|                        op->src[1]->type == GGML_TYPE_I64; | ||||
|             } break; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Aman Gupta
					Aman Gupta