mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-05 09:36:52 +00:00
mpi : fix inference
This commit is contained in:
10
llama.cpp
10
llama.cpp
@@ -1336,16 +1336,16 @@ static bool llama_eval_internal(
|
||||
struct ggml_tensor * inpL;
|
||||
|
||||
if (tokens) {
|
||||
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
|
||||
memcpy(embd->data, tokens, N*ggml_element_size(embd));
|
||||
inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
|
||||
struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
|
||||
memcpy(inp_tokens->data, tokens, N*ggml_element_size(inp_tokens));
|
||||
ggml_set_name(inp_tokens, "inp_tokens");
|
||||
|
||||
inpL = ggml_get_rows(ctx0, model.tok_embeddings, inp_tokens);
|
||||
} else {
|
||||
inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N);
|
||||
memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
|
||||
}
|
||||
|
||||
ggml_set_name(inpL, "embd");
|
||||
|
||||
const int i_gpu_start = n_layer - n_gpu_layers;
|
||||
(void) i_gpu_start;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user