mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-04 09:32:00 +00:00
cleaning debug quant
This commit is contained in:
@@ -315,11 +315,6 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t
|
|||||||
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
||||||
new_type = GGML_TYPE_IQ2_S;
|
new_type = GGML_TYPE_IQ2_S;
|
||||||
}
|
}
|
||||||
} else if (name.find("ssm_in.weight") != std::string::npos) {
|
|
||||||
// For mamba-based models it's better to not quantize the ssm-proj layers
|
|
||||||
if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S) {
|
|
||||||
new_type = GGML_TYPE_BF16;
|
|
||||||
}
|
|
||||||
} else if (name.find("attn_q.weight") != std::string::npos) {
|
} else if (name.find("attn_q.weight") != std::string::npos) {
|
||||||
if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
|
if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
|
||||||
new_type = GGML_TYPE_IQ3_XXS;
|
new_type = GGML_TYPE_IQ3_XXS;
|
||||||
|
|||||||
Reference in New Issue
Block a user