mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-07 09:57:00 +00:00
context : minor naming fix
This commit is contained in:
@@ -189,7 +189,7 @@ bool llama_context::apply_adapter_cvec(
|
|||||||
return cvec.apply(model, data, len, n_embd, il_start, il_end);
|
return cvec.apply(model, data, len, n_embd, il_start, il_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_perf_context_data llama_context::get_perf() const {
|
llama_perf_context_data llama_context::perf_get_data() const {
|
||||||
llama_perf_context_data data = {};
|
llama_perf_context_data data = {};
|
||||||
|
|
||||||
data.t_start_ms = 1e-3 * t_start_us;
|
data.t_start_ms = 1e-3 * t_start_us;
|
||||||
|
|||||||
@@ -94,7 +94,6 @@ struct llama_context {
|
|||||||
//
|
//
|
||||||
virtual int decode(llama_batch & inp_batch) = 0;
|
virtual int decode(llama_batch & inp_batch) = 0;
|
||||||
|
|
||||||
|
|
||||||
// encode a batch of tokens by evaluating the encoder part of the transformer
|
// encode a batch of tokens by evaluating the encoder part of the transformer
|
||||||
//
|
//
|
||||||
// - lctx: llama context
|
// - lctx: llama context
|
||||||
@@ -296,7 +295,7 @@ struct llama_context {
|
|||||||
|
|
||||||
// perf
|
// perf
|
||||||
|
|
||||||
virtual llama_perf_context_data get_perf() const;
|
virtual llama_perf_context_data perf_get_data() const;
|
||||||
virtual void perf_reset();
|
virtual void perf_reset();
|
||||||
|
|
||||||
// members
|
// members
|
||||||
@@ -326,20 +325,21 @@ protected:
|
|||||||
|
|
||||||
bool has_evaluated_once = false;
|
bool has_evaluated_once = false;
|
||||||
|
|
||||||
mutable int64_t t_start_us;
|
mutable int64_t t_start_us = 0;
|
||||||
mutable int64_t t_load_us;
|
mutable int64_t t_load_us = 0;
|
||||||
mutable int64_t t_p_eval_us = 0;
|
mutable int64_t t_p_eval_us = 0;
|
||||||
mutable int64_t t_eval_us = 0;
|
mutable int64_t t_eval_us = 0;
|
||||||
|
|
||||||
mutable int64_t t_compute_start_us = 0;
|
mutable int64_t t_compute_start_us = 0;
|
||||||
mutable int64_t n_queued_tokens = 0;
|
mutable int64_t n_queued_tokens = 0;
|
||||||
|
|
||||||
mutable int32_t n_p_eval = 0; // number of tokens in eval calls for the prompt (with batch size > 1)
|
mutable int32_t n_p_eval = 0; // number of tokens in eval calls for the prompt (with batch size > 1)
|
||||||
mutable int32_t n_eval = 0; // number of eval calls
|
mutable int32_t n_eval = 0; // number of eval calls
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO: make implementation details private
|
// TODO: make implementation details private
|
||||||
struct llama_context_unified : public llama_context {
|
class llama_context_unified : public llama_context {
|
||||||
|
public:
|
||||||
struct batch_manager;
|
struct batch_manager;
|
||||||
|
|
||||||
// TODO: tmp until llama-model starts implementing the graph build function
|
// TODO: tmp until llama-model starts implementing the graph build function
|
||||||
|
|||||||
@@ -54,11 +54,11 @@ enum llm_norm_type {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct llm_build_context {
|
struct llm_build_context {
|
||||||
llama_context & lctx;
|
llama_context & lctx;
|
||||||
const llama_model & model;
|
const llama_model & model;
|
||||||
const llama_hparams & hparams;
|
const llama_hparams & hparams;
|
||||||
const llama_cparams & cparams;
|
const llama_cparams & cparams;
|
||||||
const llama_ubatch & ubatch;
|
const llama_ubatch & ubatch;
|
||||||
|
|
||||||
const int64_t n_embd;
|
const int64_t n_embd;
|
||||||
const int64_t n_layer;
|
const int64_t n_layer;
|
||||||
@@ -7854,7 +7854,7 @@ struct llama_perf_context_data llama_perf_context(const struct llama_context * c
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
data = ctx->get_perf();
|
data = ctx->perf_get_data();
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user