RMSE-optimized quants for all quantization types

By default this new option is ON. One can turn it off
by setting LLAMA_NO_RMSE.

With this option enabled, the Q4_3 quantization results
in a perplexity  of 6.0344, so 0.0273 lower than simple
Q4_3 quantization.
This commit is contained in:
Iwan Kawrakow
2023-04-21 10:26:49 +02:00
committed by Georgi Gerganov
parent 0e018fe008
commit e435bfd93c
3 changed files with 286 additions and 80 deletions

View File

@@ -68,6 +68,9 @@ option(LLAMA_ACCELERATE "llama: enable Accelerate framework"
option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF)
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
# RMSE minimization when quantizing
option(LLAMA_NO_RMSE "llama: disable RMSE minimization" OFF)
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
@@ -99,6 +102,10 @@ if (NOT MSVC)
endif()
endif()
if (LLAMA_NO_RMSE)
add_compile_definitions(GGML_NO_RMSE)
endif()
if (APPLE AND LLAMA_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)
if (ACCELERATE_FRAMEWORK)