mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-16 11:27:03 +00:00
* musa: enable fp16 mma (all) and cublas on qy2 Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Update ggml/src/ggml-cuda/ggml-cuda.cu Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * Address review comments Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * Address review comments Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * musa: disable MUL_MAT_ID (q2_k × f32) due to precision issues Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
13 lines
333 B
Plaintext
13 lines
333 B
Plaintext
#pragma once
|
|
|
|
#include "ggml-cuda/common.cuh"
|
|
#include "ggml.h"
|
|
|
|
// Asynchronously copies data from src tensor to dst tensor using the provided context.
|
|
// Returns a musaError_t indicating success or failure.
|
|
musaError_t mudnnMemcpyAsync(
|
|
ggml_backend_cuda_context &ctx,
|
|
const ggml_tensor *dst,
|
|
const ggml_tensor *src
|
|
);
|