mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-06 09:46:50 +00:00
bugfix: fix the arch check for qwen3vl-moe.
This commit is contained in:
@@ -3350,7 +3350,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||||||
{
|
{
|
||||||
// for deepstack features, we consider the embd to be [main_embd, deepstack_embd_1, deepstack_embd_2, ...]
|
// for deepstack features, we consider the embd to be [main_embd, deepstack_embd_1, deepstack_embd_2, ...]
|
||||||
int64_t n_embd = hparams.n_embd;
|
int64_t n_embd = hparams.n_embd;
|
||||||
if (arch == LLM_ARCH_QWEN3VL) {
|
if (arch == LLM_ARCH_QWEN3VLMOE) {
|
||||||
n_embd = hparams.n_embd / (hparams.n_deepstack_layers + 1);
|
n_embd = hparams.n_embd / (hparams.n_deepstack_layers + 1);
|
||||||
}
|
}
|
||||||
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
||||||
|
|||||||
Reference in New Issue
Block a user