initial implementation of delayed graph allocation

This commit is contained in:
slaren
2023-07-20 15:57:48 +02:00
parent cb205c0d13
commit de69f8f20d
6 changed files with 165 additions and 87 deletions

View File

@@ -1752,6 +1752,8 @@ static void ggml_backend_cuda_get_tensor_async(ggml_backend * backend, const ggm
//ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
//printf("get tensor %s %p\n", tensor->name, tensor->data);
CUDA_CHECK(cudaMemcpyAsync(data, (const char*)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStream_main));
UNUSED(backend);