mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-04 09:32:00 +00:00
@@ -54,6 +54,7 @@ public:
|
||||
|
||||
uint32_t get_n_tokens() const;
|
||||
uint32_t get_n_outputs() const;
|
||||
uint32_t get_n_used() const;
|
||||
|
||||
// the array of output indices in the order they were encountered during the ubatch splitting
|
||||
std::vector<int32_t> & get_out_ids();
|
||||
@@ -125,6 +126,8 @@ private:
|
||||
// batch indices of the output
|
||||
std::vector<int32_t> out_ids;
|
||||
|
||||
uint32_t n_used;
|
||||
|
||||
// used[i] indicates if token i has already been used in a previous ubatch
|
||||
std::vector<bool> used;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user