mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-10 10:27:03 +00:00
quantize : assume the neutral prior is equal imatrix weights
This commit is contained in:
@@ -291,8 +291,15 @@ static int load_imatrix(const std::string & imatrix_file, std::vector<std::strin
|
||||
for (int64_t j = 0; j < ne1; ++j) {
|
||||
const float count = ((const float *) counts->data)[j];
|
||||
if (count > 0.0f) {
|
||||
float sumw = 0.0f;
|
||||
for (int64_t i = 0; i < ne0; ++i) {
|
||||
e[j*ne0 + i] = (((const float *) sums->data)[j*ne0 + i] + prior_weight) / (count + prior_weight);
|
||||
sumw += ((const float *) sums->data)[j*ne0 + i];
|
||||
}
|
||||
// the neutral prior is equal weights, and it should reduce the variance by weighted-averaging with the mean
|
||||
const float prior_value = sumw / ne0;
|
||||
|
||||
for (int64_t i = 0; i < ne0; ++i) {
|
||||
e[j*ne0 + i] = (((const float *) sums->data)[j*ne0 + i] + prior_value * prior_weight) / (count + prior_weight);
|
||||
}
|
||||
} else {
|
||||
// Partial imatrix data, this tensor never got any input during calibration
|
||||
|
||||
Reference in New Issue
Block a user