mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
CANN: Optimize ggml_cann_set_device (#15935)
* CANN: Fix ggml_cann_set_device to avoid redundant device switches - Added a check to skip aclrtSetDevice if the current device is already set. - Prevents unnecessary context switches while keeping thread/device consistency. * CANN: add device default id
This commit is contained in:
@@ -526,7 +526,10 @@ struct ggml_backend_cann_context {
|
|||||||
*/
|
*/
|
||||||
aclrtStream stream(int stream) {
|
aclrtStream stream(int stream) {
|
||||||
if (streams[stream] == nullptr) {
|
if (streams[stream] == nullptr) {
|
||||||
ggml_cann_set_device(device);
|
// If the device is not set here, destroying the stream later may cause a mismatch
|
||||||
|
// between the thread contexts where the stream was created and destroyed.
|
||||||
|
// However, I printed the device_id, thread_id, and stream, and they are all consistent.
|
||||||
|
ACL_CHECK(aclrtSetDevice(device));
|
||||||
ACL_CHECK(aclrtCreateStream(&streams[stream]));
|
ACL_CHECK(aclrtCreateStream(&streams[stream]));
|
||||||
}
|
}
|
||||||
return streams[stream];
|
return streams[stream];
|
||||||
|
|||||||
@@ -75,13 +75,12 @@
|
|||||||
* @param device The device ID to set.
|
* @param device The device ID to set.
|
||||||
*/
|
*/
|
||||||
void ggml_cann_set_device(const int32_t device) {
|
void ggml_cann_set_device(const int32_t device) {
|
||||||
// TODO: uncomment these lines after empty context has fixed.
|
int current_device = -1;
|
||||||
// int current_device;
|
aclrtGetDevice(¤t_device);
|
||||||
// ACL_CHECK(aclrtGetDevice(¤t_device));
|
|
||||||
|
|
||||||
// if (device == current_device) {
|
if (device == current_device) {
|
||||||
// return;
|
return;
|
||||||
// }
|
}
|
||||||
ACL_CHECK(aclrtSetDevice(device));
|
ACL_CHECK(aclrtSetDevice(device));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1729,6 +1728,7 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||||||
ggml_cann_get_rows(ctx, dst);
|
ggml_cann_get_rows(ctx, dst);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_SET_ROWS:
|
case GGML_OP_SET_ROWS:
|
||||||
|
std::cout << "lcg GGML_OP_SET_ROWS"<< std::endl;
|
||||||
ggml_cann_set_rows(ctx, dst);
|
ggml_cann_set_rows(ctx, dst);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_DUP:
|
case GGML_OP_DUP:
|
||||||
|
|||||||
Reference in New Issue
Block a user