mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-21 12:16:57 +00:00
CUDA: replace GGML_CUDA_F16 with CUDA arch checks (#15433)
This commit is contained in:
@@ -42,7 +42,7 @@ static __device__ void cpy_blck_q8_0_f32(const char * cxi, char * cdsti) {
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < QK8_0; j += 2) {
|
||||
dfloat2 dq;
|
||||
float2 dq;
|
||||
dequantize_q8_0(cxi, 0, j, dq);
|
||||
*(cdstf + j) = dq.x;
|
||||
*(cdstf + j + 1) = dq.y;
|
||||
@@ -55,7 +55,7 @@ static __device__ void cpy_blck_q_f32(const char * cxi, char * cdsti) {
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < qk/2; j++) {
|
||||
dfloat2 dq;
|
||||
float2 dq;
|
||||
dequant(cxi, 0, j, dq);
|
||||
*(cdstf + j) = dq.x;
|
||||
*(cdstf + j + qk/2) = dq.y;
|
||||
|
||||
Reference in New Issue
Block a user