mpi : fix inference

This commit is contained in:
Georgi Gerganov
2023-07-09 18:26:20 +03:00
parent ef37dd14e7
commit beadbf3380
2 changed files with 46 additions and 30 deletions

View File

@@ -1336,16 +1336,16 @@ static bool llama_eval_internal(
struct ggml_tensor * inpL;
if (tokens) {
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
memcpy(embd->data, tokens, N*ggml_element_size(embd));
inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
memcpy(inp_tokens->data, tokens, N*ggml_element_size(inp_tokens));
ggml_set_name(inp_tokens, "inp_tokens");
inpL = ggml_get_rows(ctx0, model.tok_embeddings, inp_tokens);
} else {
inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N);
memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
}
ggml_set_name(inpL, "embd");
const int i_gpu_start = n_layer - n_gpu_layers;
(void) i_gpu_start;