CANN: ROPE cache sin/cos repeat (#15501)

Signed-off-by: noemotiovon <757486878@qq.com>
This commit is contained in:
Chenguang Li
2025-08-25 10:32:21 +08:00
committed by GitHub
parent 043fb27d38
commit c247d06f38
2 changed files with 135 additions and 88 deletions

View File

@@ -368,10 +368,6 @@ struct ggml_backend_cann_context {
std::string name; /**< Name of the device. */
std::string description; /**< Description of the device. */
aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
void* init_ptr = nullptr;
void* sin_ptr = nullptr;
void* cos_ptr = nullptr;
int64_t max_prompt_length = 65536;
#ifdef USE_ACL_GRAPH
/// Cached CANN ACL graph used for executing the current ggml computation graph.
std::unique_ptr<ggml_cann_graph> cann_graph;
@@ -379,6 +375,12 @@ struct ggml_backend_cann_context {
cann_task_queue task_queue;
bool async_mode;
bool support_set_rows;
// Rope Cache
void* rope_init_ptr = nullptr;
void* rope_sin_ptr = nullptr;
void* rope_cos_ptr = nullptr;
int64_t max_prompt_length = 0;
// Constant Pool
void* f32_zero_cache = nullptr;
void* f32_one_cache = nullptr;
int64_t f32_zero_cache_element = 0;
@@ -422,14 +424,20 @@ struct ggml_backend_cann_context {
ACL_CHECK(aclrtDestroyStream(streams[i]));
}
}
if(init_ptr != nullptr) {
ACL_CHECK(aclrtFree(init_ptr));
if(rope_init_ptr != nullptr) {
ACL_CHECK(aclrtFree(rope_init_ptr));
}
if(sin_ptr != nullptr) {
ACL_CHECK(aclrtFree(sin_ptr));
if(rope_sin_ptr != nullptr) {
ACL_CHECK(aclrtFree(rope_sin_ptr));
}
if(cos_ptr != nullptr) {
ACL_CHECK(aclrtFree(cos_ptr));
if(rope_cos_ptr != nullptr) {
ACL_CHECK(aclrtFree(rope_cos_ptr));
}
if(f32_zero_cache != nullptr) {
ACL_CHECK(aclrtFree(f32_zero_cache));
}
if(f32_one_cache != nullptr) {
ACL_CHECK(aclrtFree(f32_one_cache));
}
}