mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-20 12:07:33 +00:00
RMSE-optimized quants for all quantization types
By default this new option is ON. One can turn it off by setting LLAMA_NO_RMSE. With this option enabled, the Q4_3 quantization results in a perplexity of 6.0344, so 0.0273 lower than simple Q4_3 quantization.
This commit is contained in:
committed by
Georgi Gerganov
parent
0e018fe008
commit
e435bfd93c
@@ -68,6 +68,9 @@ option(LLAMA_ACCELERATE "llama: enable Accelerate framework"
|
||||
option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF)
|
||||
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
|
||||
|
||||
# RMSE minimization when quantizing
|
||||
option(LLAMA_NO_RMSE "llama: disable RMSE minimization" OFF)
|
||||
|
||||
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
||||
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
||||
|
||||
@@ -99,6 +102,10 @@ if (NOT MSVC)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (LLAMA_NO_RMSE)
|
||||
add_compile_definitions(GGML_NO_RMSE)
|
||||
endif()
|
||||
|
||||
if (APPLE AND LLAMA_ACCELERATE)
|
||||
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
||||
if (ACCELERATE_FRAMEWORK)
|
||||
|
||||
Reference in New Issue
Block a user