mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	imatrix : use a single count for dense 3d tensors
This commit is contained in:
		@@ -112,13 +112,6 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
 | 
				
			|||||||
    const char * data = is_host ? (const char *) src1->data : m_src1_data.data();
 | 
					    const char * data = is_host ? (const char *) src1->data : m_src1_data.data();
 | 
				
			||||||
    GGML_ASSERT(src1->nb[0] == ggml_element_size(src1));
 | 
					    GGML_ASSERT(src1->nb[0] == ggml_element_size(src1));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // TODO: 4d? (is that even used in practice?)
 | 
					 | 
				
			||||||
    // the extra dimension would need to be stored somewhere to be reflected in the imatrix file
 | 
					 | 
				
			||||||
    if (ggml_nrows(src1) != src1->ne[1] * src1->ne[2]) {
 | 
					 | 
				
			||||||
        LOG_ERR("%s: tensor has more than 3 dimensions: %s", __func__, wname.c_str());
 | 
					 | 
				
			||||||
        GGML_ASSERT(false);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // this has been adapted to the new format of storing merged experts in a single 3d tensor
 | 
					    // this has been adapted to the new format of storing merged experts in a single 3d tensor
 | 
				
			||||||
    // ref: https://github.com/ggml-org/llama.cpp/pull/6387
 | 
					    // ref: https://github.com/ggml-org/llama.cpp/pull/6387
 | 
				
			||||||
    if (t->op == GGML_OP_MUL_MAT_ID) {
 | 
					    if (t->op == GGML_OP_MUL_MAT_ID) {
 | 
				
			||||||
@@ -134,6 +127,13 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        GGML_ASSERT(ids->ne[1] == src1->ne[2]);
 | 
					        GGML_ASSERT(ids->ne[1] == src1->ne[2]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // TODO: 4d? (is that even used in practice?)
 | 
				
			||||||
 | 
					        // the extra dimension would need to be stored somewhere to be reflected in the imatrix file
 | 
				
			||||||
 | 
					        if (ggml_nrows(src1) != src1->ne[1] * src1->ne[2]) {
 | 
				
			||||||
 | 
					            LOG_ERR("%s: tensor has more than 3 dimensions: %s", __func__, wname.c_str());
 | 
				
			||||||
 | 
					            GGML_ASSERT(false);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        m_ids.resize(ggml_nbytes(ids));
 | 
					        m_ids.resize(ggml_nbytes(ids));
 | 
				
			||||||
        ggml_backend_tensor_get(ids, m_ids.data(), 0, ggml_nbytes(ids));
 | 
					        ggml_backend_tensor_get(ids, m_ids.data(), 0, ggml_nbytes(ids));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -199,19 +199,33 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
 | 
				
			|||||||
        auto & e = m_stats[wname];
 | 
					        auto & e = m_stats[wname];
 | 
				
			||||||
        const int64_t n_mat = src1->ne[2] * src1->ne[3];
 | 
					        const int64_t n_mat = src1->ne[2] * src1->ne[3];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // use a single count per dense tensor
 | 
				
			||||||
 | 
					        if ((int64_t) e.counts.size() == n_mat) {
 | 
				
			||||||
 | 
					            bool all_equal = true;
 | 
				
			||||||
 | 
					            for (size_t i = 1; i < e.counts.size(); ++i) {
 | 
				
			||||||
 | 
					                if (e.counts[0] != e.counts[i]) {
 | 
				
			||||||
 | 
					                    all_equal = false;
 | 
				
			||||||
 | 
					                    break;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if (all_equal) {
 | 
				
			||||||
 | 
					                e.counts.resize(1);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        if (e.values.empty()) {
 | 
					        if (e.values.empty()) {
 | 
				
			||||||
            e.values.resize(src1->ne[0] * n_mat, 0);
 | 
					            e.values.resize(src1->ne[0] * n_mat, 0);
 | 
				
			||||||
            e.counts.resize(n_mat, 0);
 | 
					            e.counts.resize(1, 0);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        else if (e.values.size() != (size_t)(src1->ne[0] * n_mat)) {
 | 
					        else if (e.values.size() != (size_t)(src1->ne[0] * n_mat)) {
 | 
				
			||||||
            LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)(src1->ne[0] * n_mat));
 | 
					            LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)(src1->ne[0] * n_mat));
 | 
				
			||||||
            exit(1); //GGML_ABORT("fatal error");
 | 
					            exit(1); //GGML_ABORT("fatal error");
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        else if (e.counts.size() != (size_t)n_mat) {
 | 
					        else if (e.counts.size() != 1) {
 | 
				
			||||||
            LOG_ERR("%s: inconsistent expert count for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.counts.size(), (int)n_mat);
 | 
					            LOG_ERR("%s: inconsistent matrix count for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.counts.size(), 1);
 | 
				
			||||||
            exit(1); //GGML_ABORT("fatal error");
 | 
					            exit(1); //GGML_ABORT("fatal error");
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d x %5d, %d\n", __func__, m_last_chunk, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->ne[2], (int)src1->type);
 | 
					        LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d x %5d, %d\n", __func__, m_last_chunk, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->ne[2], (int)src1->type);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for (int64_t i3 = 0; i3 < src1->ne[3]; ++i3) {
 | 
					        for (int64_t i3 = 0; i3 < src1->ne[3]; ++i3) {
 | 
				
			||||||
            for (int64_t i2 = 0; i2 < src1->ne[2]; ++i2) {
 | 
					            for (int64_t i2 = 0; i2 < src1->ne[2]; ++i2) {
 | 
				
			||||||
                const int64_t mat_id = i3 * src1->ne[2] + i2;
 | 
					                const int64_t mat_id = i3 * src1->ne[2] + i2;
 | 
				
			||||||
@@ -219,7 +233,6 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                for (int64_t row = 0; row < src1->ne[1]; ++row) {
 | 
					                for (int64_t row = 0; row < src1->ne[1]; ++row) {
 | 
				
			||||||
                    const float * x = (const float *) (data + row * src1->nb[1] + i2 * src1->nb[2] + i3 * src1->ne[3]);
 | 
					                    const float * x = (const float *) (data + row * src1->nb[1] + i2 * src1->nb[2] + i3 * src1->ne[3]);
 | 
				
			||||||
                    e.counts[mat_id]++;
 | 
					 | 
				
			||||||
                    for (int64_t j = 0; j < src1->ne[0]; ++j) {
 | 
					                    for (int64_t j = 0; j < src1->ne[0]; ++j) {
 | 
				
			||||||
                        e.values[mat_start + j] += x[j] * x[j];
 | 
					                        e.values[mat_start + j] += x[j] * x[j];
 | 
				
			||||||
                        if (!std::isfinite((float)e.values[j])) {
 | 
					                        if (!std::isfinite((float)e.values[j])) {
 | 
				
			||||||
@@ -228,17 +241,18 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
 | 
				
			|||||||
                        }
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                const int32_t n_chunk = e.counts[mat_id] / chunk_size;
 | 
					            }
 | 
				
			||||||
                if (n_chunk > m_last_chunk) {
 | 
					        }
 | 
				
			||||||
                    const int32_t chunk_step = n_chunk - m_last_chunk;
 | 
					        e.counts[0] += src1->ne[1];
 | 
				
			||||||
                    m_last_chunk = n_chunk;
 | 
					        const int32_t n_chunk = e.counts[0] / chunk_size;
 | 
				
			||||||
                    if ((m_last_chunk % m_params.n_out_freq) / chunk_step == 0) {
 | 
					        if (n_chunk > m_last_chunk) {
 | 
				
			||||||
                        save_imatrix();
 | 
					            const int32_t chunk_step = n_chunk - m_last_chunk;
 | 
				
			||||||
                    }
 | 
					            m_last_chunk = n_chunk;
 | 
				
			||||||
                    if (m_params.n_save_freq > 0 && (m_last_chunk % m_params.n_save_freq) / chunk_step == 0) {
 | 
					            if ((m_last_chunk % m_params.n_out_freq) / chunk_step == 0) {
 | 
				
			||||||
                        save_imatrix(m_last_chunk);
 | 
					                save_imatrix();
 | 
				
			||||||
                    }
 | 
					            }
 | 
				
			||||||
                }
 | 
					            if (m_params.n_save_freq > 0 && (m_last_chunk % m_params.n_save_freq) / chunk_step == 0) {
 | 
				
			||||||
 | 
					                save_imatrix(m_last_chunk);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user