mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-20 12:07:33 +00:00
cuda : rename build flag to LLAMA_CUDA (#6299)
This commit is contained in:
@@ -124,7 +124,7 @@ llama_print_timings: total time = 34570.79 ms
|
||||
## Orin compile and run
|
||||
### compile
|
||||
```sh
|
||||
make LLAMA_CUBLAS=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32
|
||||
make LLAMA_CUDA=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32
|
||||
```
|
||||
|
||||
### run on Orin
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
#ifdef GGML_USE_CUDA
|
||||
#include "ggml-cuda.h"
|
||||
#endif
|
||||
|
||||
@@ -968,7 +968,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
#ifdef GGML_USE_CUDA
|
||||
new_clip->backend = ggml_backend_cuda_init(0);
|
||||
printf("%s: CLIP using CUDA backend\n", __func__);
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user