diff --git a/src/llama-model.cpp b/src/llama-model.cpp index c64bf9de93..ed491721d4 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -8847,9 +8847,7 @@ struct llm_build_starcoder2 : public llm_graph_context { }; struct llm_build_mamba : public llm_graph_context { - const llama_model & model; - - llm_build_mamba(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params), model(model) { + llm_build_mamba(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { ggml_tensor * cur; ggml_tensor * inpL; @@ -8865,7 +8863,7 @@ struct llm_build_mamba : public llm_graph_context { LLM_NORM_RMS, il); cb(cur, "attn_norm", il); - cur = build_mamba_layer(gf, cur, state_copy, ubatch, il); + cur = build_mamba_layer(gf, cur, state_copy, model, ubatch, il); if (il == n_layer - 1) { // skip computing output for unused tokens @@ -8906,6 +8904,7 @@ struct llm_build_mamba : public llm_graph_context { ggml_cgraph * gf, ggml_tensor * cur, ggml_tensor * state_copy, + const llama_model & model, const llama_ubatch & ubatch, int il) const { const auto * kv_state = static_cast(mstate); diff --git a/tests/test-model-random.cpp b/tests/test-model-random.cpp index 95a03bc2f1..8e95968670 100644 --- a/tests/test-model-random.cpp +++ b/tests/test-model-random.cpp @@ -227,7 +227,7 @@ struct gguf_value { for (size_t i = 0; i < arr_size; ++i) { memcpy(data.data() + type_size * i, &(*value.array)[i].value, type_size); } - gguf_set_arr_data(ctx, k, arr_type, data.data(), data.size()); + gguf_set_arr_data(ctx, k, arr_type, data.data(), data.size() / type_size); } // TODO: handle nested arrays } @@ -317,7 +317,12 @@ struct model_variant { gguf_add_tensor(ctx_gguf, tensor); } - return gguf_write_to_file(ctx_gguf, fname, false); + bool status = gguf_write_to_file(ctx_gguf, fname, false); + + ggml_free(ctx); + gguf_free(ctx_gguf); + + return status; } static void insert_from_arch(std::vector & variants, llm_arch arch) { @@ -762,9 +767,8 @@ int main(int argc, char ** argv) { std::mt19937 rng(42); // TODO: multiple sequences per token - const int64_t n_batch = 2048; - const int64_t n_seq_len = 1024; - std::uniform_int_distribution rand_seq_init_len(n_seq_len / 4, 3 * n_seq_len / 4); + const int32_t n_batch = 2048; + const int32_t n_seq_len = 1024; llama_batch batch = llama_batch_init(n_batch, 0, 1); // TODO: batch with embeddings @@ -794,10 +798,10 @@ int main(int argc, char ** argv) { // const auto n_vocab = llama_vocab_n_tokens(llama_model_get_vocab(model)); // const auto n_embd = llama_model_n_embd(model); - for (int64_t n_seq_max : { 1, 2, 13 } ) { + for (int32_t n_seq_max : { 1, 2, 13 } ) { // TODO(later): context shift testing - for (int64_t n_ctx : { n_seq_len * n_seq_max }) { + for (int32_t n_ctx : { n_seq_len * n_seq_max }) { std::vector ref_outputs; @@ -824,7 +828,7 @@ int main(int argc, char ** argv) { for (bool shuffle : { false, true }) { - for (int64_t n_ubatch : { 1, 2, 512 } ) { + for (int32_t n_ubatch : { 1, 2, 512 } ) { std::vector valid(n_seq_max, true); @@ -852,7 +856,7 @@ int main(int argc, char ** argv) { if (batch.n_tokens < n_batch) { const int64_t seq_len = std::min(n_batch - batch.n_tokens, - (int64_t) ref_outputs[seq_id].prompt_len - seq_id_n_past[seq_id]); + ref_outputs[seq_id].prompt_len - seq_id_n_past[seq_id]); ref_outputs[seq_id].add_to_batch(batch, seq_id_n_past[seq_id], seq_len, seq_id); seq_ids_in_batch.insert(seq_id); @@ -891,7 +895,7 @@ int main(int argc, char ** argv) { } fprintf(stdout, - "Comparing output for '%s', with shuffle=%i, n_seq_max=%li, n_ctx=%li, n_ubatch=%li: ", + "Comparing output for '%s', with shuffle=%i, n_seq_max=%i, n_ctx=%i, n_ubatch=%i: ", variant.name.c_str(), shuffle, n_seq_max, n_ctx, n_ubatch); if (std::all_of(valid.begin(), valid.end(), [](bool v) { return v; })) { fprintf(stdout, "\033[1;32mOK\033[0m\n");