mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-19 11:57:07 +00:00
CANN: implement LRU cache for ACL graphs (#15814)
* CANN: implement LRU cache for ACL graphs in CANN backend - Introduce ggml_cann_graph_lru_cache to store multiple ggml_cann_graph objects. - Graphs are loaded on demand and evicted using LRU policy when capacity is exceeded. - Updated push, move_to_front, and clear methods to manage cached graphs efficiently. - Ensures reuse of graphs, reducing graph reconstruction overhead in CANN backend. * fix typo * The LRU cache capacity can be configured via an env variable Signed-off-by: noemotiovon <757486878@qq.com> * refactory acl graph * refactory && fix review comments Signed-off-by: noemotiovon <757486878@qq.com> --------- Signed-off-by: noemotiovon <757486878@qq.com>
This commit is contained in:
@@ -38,6 +38,7 @@
|
||||
#include <unistd.h>
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <list>
|
||||
|
||||
#include "../include/ggml-cann.h"
|
||||
#include "../include/ggml.h"
|
||||
@@ -106,6 +107,7 @@ int32_t ggml_cann_get_device();
|
||||
|
||||
std::optional<std::string> get_env(const std::string& name);
|
||||
bool parse_bool(const std::string& value);
|
||||
int parse_integer(const std::string& value);
|
||||
|
||||
/**
|
||||
* @brief Abstract base class for memory pools used by CANN.
|
||||
@@ -350,7 +352,7 @@ struct ggml_graph_node_properties {
|
||||
struct ggml_cann_graph {
|
||||
~ggml_cann_graph() {
|
||||
if (graph != nullptr) {
|
||||
aclmdlRIDestroy(graph);
|
||||
ACL_CHECK(aclmdlRIDestroy(graph));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -358,6 +360,64 @@ struct ggml_cann_graph {
|
||||
|
||||
std::vector<ggml_graph_node_properties> ggml_graph_properties;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief LRU cache for managing ggml_cann_graph objects.
|
||||
*
|
||||
* This class maintains a list of shared_ptr to ggml_cann_graph objects
|
||||
* and enforces a maximum capacity. It provides methods to push new graphs,
|
||||
* move existing graphs to the front (most recently used), and clear the cache.
|
||||
*/
|
||||
struct ggml_cann_graph_lru_cache {
|
||||
size_t capacity; /**< Maximum number of graphs in the cache. */
|
||||
|
||||
std::list<ggml_cann_graph*> cache_list; /**< List storing cached graphs as raw pointers. */
|
||||
|
||||
ggml_cann_graph_lru_cache() {
|
||||
capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12"));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Push a new graph to the front of the cache.
|
||||
* If the cache exceeds capacity, the least recently used graph is deleted.
|
||||
* @param new_node Pointer to the new ggml_cann_graph to cache.
|
||||
* Ownership is transferred to the cache (cache will delete it).
|
||||
*/
|
||||
void push(ggml_cann_graph* new_node) {
|
||||
if (cache_list.size() >= capacity) {
|
||||
ggml_cann_graph* old = cache_list.back();
|
||||
cache_list.pop_back();
|
||||
delete old; // free the old graph
|
||||
}
|
||||
cache_list.push_front(new_node);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Move an existing graph to the front of the cache.
|
||||
* @param node Pointer to the ggml_cann_graph to move.
|
||||
*/
|
||||
void move_to_front(ggml_cann_graph* node) {
|
||||
cache_list.remove(node);
|
||||
cache_list.push_front(node);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Clear all graphs from the cache (also frees memory).
|
||||
*/
|
||||
void clear() {
|
||||
for (auto ptr : cache_list) {
|
||||
delete ptr;
|
||||
}
|
||||
cache_list.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destructor that clears the cache and frees all cached graphs.
|
||||
*/
|
||||
~ggml_cann_graph_lru_cache() {
|
||||
clear();
|
||||
}
|
||||
};
|
||||
#endif // USE_ACL_GRAPH
|
||||
|
||||
struct ggml_cann_rope_cache {
|
||||
@@ -394,7 +454,7 @@ struct ggml_backend_cann_context {
|
||||
aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
|
||||
#ifdef USE_ACL_GRAPH
|
||||
/// Cached CANN ACL graph used for executing the current ggml computation graph.
|
||||
std::unique_ptr<ggml_cann_graph> cann_graph;
|
||||
ggml_cann_graph_lru_cache graph_lru_cache;
|
||||
bool acl_graph_mode = true;
|
||||
#endif
|
||||
cann_task_queue task_queue;
|
||||
|
||||
Reference in New Issue
Block a user