mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-08 10:07:01 +00:00
sycl: use oneDNN for matrices multiplication (#12972)
This commit is contained in:
committed by
GitHub
parent
6c8b91500e
commit
9c404ed54c
@@ -32,16 +32,36 @@ public:
|
||||
else static_assert(0);
|
||||
}
|
||||
|
||||
static inline void row_gemm(ggml_backend_sycl_context & ctx, bool a_trans, bool b_trans, int m, int n, int k,
|
||||
const void * a, dt at, const void * b, dt bt, void * c, dt ct, const queue_ptr & q) {
|
||||
// matrix A has m rows, k columns
|
||||
// matrix B has k rows, n columns
|
||||
// nra - number of elements to skip when moving into next row in A
|
||||
// nrb - number of elements to skip when moving into next row in B
|
||||
// nca - number of elements to skip when moving into next column in A
|
||||
// ncb - number of elements to skip when moving into next column in B
|
||||
// stride_a - number of elements to skip when moving to next A matrix
|
||||
// stride_b - number of elements to skip when moving to next B matrix
|
||||
// batches_a - number of A matrices
|
||||
// batches_b - number of B matrices
|
||||
static void gemm(ggml_backend_sycl_context & ctx, int m, int n, int k,
|
||||
const void * a, dt at, dnnl_dim_t nra, dnnl_dim_t nca, dnnl_dim_t stride_a,
|
||||
const void * b, dt bt, dnnl_dim_t nrb, dnnl_dim_t ncb, dnnl_dim_t stride_b,
|
||||
void * c, dt ct, const queue_ptr & q, dnnl_dim_t batches_a, dnnl_dim_t batches_b) {
|
||||
|
||||
auto stream = ctx.stream_dnnl(q);
|
||||
auto eng = ctx.engine_dnnl(q);
|
||||
dnnl::memory::dims a_dims = { m, k };
|
||||
dnnl::memory::dims b_dims = { k, n };
|
||||
dnnl::memory::dims c_dims = { m, n };
|
||||
const auto a_in_md = dnnl::memory::desc(a_dims, at, a_trans ? tag::ba : tag::ab);
|
||||
const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_trans ? tag::ba : tag::ab);
|
||||
const auto c_md = dnnl::memory::desc(c_dims, ct, tag::ab);
|
||||
|
||||
// { # strides, # rows, # columns }
|
||||
dnnl::memory::dims a_dims = { batches_a, m, k };
|
||||
dnnl::memory::dims b_dims = { batches_b, k, n };
|
||||
dnnl::memory::dims c_dims = { std::max(batches_a, batches_b), m, n };
|
||||
|
||||
// { # elements to skip to next stride, # elements to skip to next row, # elements to skip to next column }
|
||||
dnnl::memory::dims a_strides = { stride_a, nra, nca };
|
||||
dnnl::memory::dims b_strides = { stride_b, nrb, ncb };
|
||||
|
||||
const auto a_in_md = dnnl::memory::desc(a_dims, at, a_strides);
|
||||
const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_strides);
|
||||
const auto c_md = dnnl::memory::desc(c_dims, ct, tag::abc);
|
||||
|
||||
dnnl::primitive_attr primitive_attr;
|
||||
primitive_attr.set_scratchpad_mode(dnnl::scratchpad_mode::user);
|
||||
@@ -63,6 +83,15 @@ public:
|
||||
|
||||
matmul_prim.execute(stream, matmul_args);
|
||||
}
|
||||
|
||||
// matrices A and B are column major, both having k rows
|
||||
// matrix A has m column, matrix B has n columns
|
||||
// output: column major matrix C = A transposed * B
|
||||
static void row_gemm(ggml_backend_sycl_context & ctx, int m, int n, int k,
|
||||
const void * a, dt at, const void * b, dt bt, void * c, dt ct, const queue_ptr & q) {
|
||||
|
||||
gemm(ctx, m, n, k, a, at, k, 1, k * m, b, bt, 1, k, n * k, c, ct, q, 1, 1);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user