mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-28 08:31:25 +00:00
CUDA: mul_mat_q RDNA2 tunings (#2910)
* CUDA: mul_mat_q RDNA2 tunings * Update ggml-cuda.cu Co-authored-by: Henri Vasserman <henv@hot.ee> --------- Co-authored-by: Henri Vasserman <henv@hot.ee>
This commit is contained in:
1
Makefile
1
Makefile
@@ -408,7 +408,6 @@ ifdef LLAMA_HIPBLAS
|
||||
HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
||||
HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
|
||||
HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||
HIPFLAGS += -DCC_TURING=1000000000
|
||||
ifdef LLAMA_CUDA_FORCE_DMMV
|
||||
HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
|
||||
endif # LLAMA_CUDA_FORCE_DMMV
|
||||
|
||||
Reference in New Issue
Block a user