mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-08 10:07:01 +00:00
CUDA: MMQ support for iq4_nl, iq4_xs (#8278)
This commit is contained in:
@@ -1,36 +1,8 @@
|
||||
#include "common.cuh"
|
||||
#include <cstdint>
|
||||
|
||||
static __device__ __forceinline__ int get_int_from_int8(const int8_t * x8, const int & i32) {
|
||||
const uint16_t * x16 = (const uint16_t *) (x8 + sizeof(int) * i32); // assume at least 2 byte alignment
|
||||
|
||||
int x32 = 0;
|
||||
x32 |= x16[0] << 0;
|
||||
x32 |= x16[1] << 16;
|
||||
|
||||
return x32;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int get_int_from_uint8(const uint8_t * x8, const int & i32) {
|
||||
const uint16_t * x16 = (const uint16_t *) (x8 + sizeof(int) * i32); // assume at least 2 byte alignment
|
||||
|
||||
int x32 = 0;
|
||||
x32 |= x16[0] << 0;
|
||||
x32 |= x16[1] << 16;
|
||||
|
||||
return x32;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int get_int_from_int8_aligned(const int8_t * x8, const int & i32) {
|
||||
return *((const int *) (x8 + sizeof(int) * i32)); // assume at least 4 byte alignment
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int get_int_from_uint8_aligned(const uint8_t * x8, const int & i32) {
|
||||
return *((const int *) (x8 + sizeof(int) * i32)); // assume at least 4 byte alignment
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int get_int_b2(const void * x, const int & i32) {
|
||||
const uint16_t * x16 = (const uint16_t *) x;
|
||||
const uint16_t * x16 = (const uint16_t *) x; // assume at least 2 byte alignment
|
||||
|
||||
int x32 = x16[2*i32 + 0] << 0;
|
||||
x32 |= x16[2*i32 + 1] << 16;
|
||||
@@ -768,6 +740,7 @@ static __device__ __forceinline__ float vec_dot_q6_K_q8_1(
|
||||
}
|
||||
|
||||
#define VDR_IQ2_XXS_Q8_1_MMVQ 2
|
||||
#define VDR_IQ2_XXS_Q8_1_MMQ 2
|
||||
|
||||
static __device__ __forceinline__ float vec_dot_iq2_xxs_q8_1(
|
||||
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
|
||||
@@ -802,6 +775,7 @@ static __device__ __forceinline__ float vec_dot_iq2_xxs_q8_1(
|
||||
}
|
||||
|
||||
#define VDR_IQ2_XS_Q8_1_MMVQ 2
|
||||
#define VDR_IQ2_XS_Q8_1_MMQ 2
|
||||
|
||||
static __device__ __forceinline__ float vec_dot_iq2_xs_q8_1(
|
||||
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
|
||||
@@ -840,6 +814,7 @@ static __device__ __forceinline__ float vec_dot_iq2_xs_q8_1(
|
||||
}
|
||||
|
||||
#define VDR_IQ2_S_Q8_1_MMVQ 2
|
||||
#define VDR_IQ2_S_Q8_1_MMQ 2
|
||||
|
||||
static __device__ __forceinline__ float vec_dot_iq2_s_q8_1(
|
||||
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
|
||||
@@ -887,6 +862,7 @@ static __device__ __forceinline__ float vec_dot_iq2_s_q8_1(
|
||||
}
|
||||
|
||||
#define VDR_IQ3_XXS_Q8_1_MMVQ 2
|
||||
#define VDR_IQ3_XXS_Q8_1_MMQ 2
|
||||
|
||||
static __device__ __forceinline__ float vec_dot_iq3_xxs_q8_1(
|
||||
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
|
||||
@@ -921,6 +897,7 @@ static __device__ __forceinline__ float vec_dot_iq3_xxs_q8_1(
|
||||
}
|
||||
|
||||
#define VDR_IQ3_S_Q8_1_MMVQ 2
|
||||
#define VDR_IQ3_S_Q8_1_MMQ 2
|
||||
|
||||
// TODO: don't use lookup table for signs
|
||||
static __device__ __forceinline__ float vec_dot_iq3_s_q8_1(
|
||||
@@ -962,6 +939,9 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1(
|
||||
return d * sumi;
|
||||
}
|
||||
|
||||
#define VDR_IQ1_S_Q8_1_MMVQ 1
|
||||
#define VDR_IQ1_S_Q8_1_MMQ 1
|
||||
|
||||
static __device__ __forceinline__ float vec_dot_iq1_s_q8_1(
|
||||
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
|
||||
const block_iq1_s * bq1 = (const block_iq1_s *) vbq + kbx;
|
||||
@@ -992,6 +972,9 @@ static __device__ __forceinline__ float vec_dot_iq1_s_q8_1(
|
||||
return d1q * (ds.x*sumi + ds.y*delta);
|
||||
}
|
||||
|
||||
#define VDR_IQ1_M_Q8_1_MMVQ 1
|
||||
#define VDR_IQ1_M_Q8_1_MMQ 1
|
||||
|
||||
static __device__ __forceinline__ float vec_dot_iq1_m_q8_1(
|
||||
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
|
||||
|
||||
@@ -1051,6 +1034,7 @@ static __device__ __forceinline__ int2 get_int_from_table_16(const int & q4) {
|
||||
}
|
||||
|
||||
#define VDR_IQ4_NL_Q8_1_MMVQ 2
|
||||
#define VDR_IQ4_NL_Q8_1_MMQ 4
|
||||
|
||||
static __device__ __forceinline__ float vec_dot_iq4_nl_q8_1(
|
||||
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
|
||||
@@ -1074,6 +1058,7 @@ static __device__ __forceinline__ float vec_dot_iq4_nl_q8_1(
|
||||
}
|
||||
|
||||
#define VDR_IQ4_XS_Q8_1_MMVQ 4
|
||||
#define VDR_IQ4_XS_Q8_1_MMQ 4
|
||||
|
||||
static __device__ __forceinline__ float vec_dot_iq4_xs_q8_1(
|
||||
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
|
||||
|
||||
Reference in New Issue
Block a user