CUDA: replace GGML_CUDA_F16 with CUDA arch checks (#15433)

This commit is contained in:
Johannes Gäßler
2025-08-20 16:58:49 +02:00
committed by GitHub
parent fec9519802
commit 7a6e91ad26
12 changed files with 32 additions and 86 deletions

View File

@@ -42,7 +42,7 @@ static __device__ void cpy_blck_q8_0_f32(const char * cxi, char * cdsti) {
#pragma unroll
for (int j = 0; j < QK8_0; j += 2) {
dfloat2 dq;
float2 dq;
dequantize_q8_0(cxi, 0, j, dq);
*(cdstf + j) = dq.x;
*(cdstf + j + 1) = dq.y;
@@ -55,7 +55,7 @@ static __device__ void cpy_blck_q_f32(const char * cxi, char * cdsti) {
#pragma unroll
for (int j = 0; j < qk/2; j++) {
dfloat2 dq;
float2 dq;
dequant(cxi, 0, j, dq);
*(cdstf + j) = dq.x;
*(cdstf + j + qk/2) = dq.y;