mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-13 10:57:15 +00:00
Add buffer label and enable dawn-specific toggles to turn off some checks
This commit is contained in:
@@ -309,10 +309,12 @@ struct ggml_backend_webgpu_context {
|
|||||||
struct ggml_backend_webgpu_buffer_context {
|
struct ggml_backend_webgpu_buffer_context {
|
||||||
webgpu_context webgpu_ctx;
|
webgpu_context webgpu_ctx;
|
||||||
wgpu::Buffer buffer;
|
wgpu::Buffer buffer;
|
||||||
|
std::string label;
|
||||||
|
|
||||||
ggml_backend_webgpu_buffer_context(webgpu_context ctx, wgpu::Buffer buf) :
|
ggml_backend_webgpu_buffer_context(webgpu_context ctx, wgpu::Buffer buf, std::string lbl) :
|
||||||
webgpu_ctx(std::move(ctx)),
|
webgpu_ctx(std::move(ctx)),
|
||||||
buffer(std::move(buf)) {}
|
buffer(std::move(buf)),
|
||||||
|
label(std::move(lbl)) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* End struct definitions */
|
/* End struct definitions */
|
||||||
@@ -1336,11 +1338,11 @@ static void ggml_backend_webgpu_buffer_memset_tensor(ggml_backend_buffer_t buffe
|
|||||||
|
|
||||||
WEBGPU_CPU_PROFILE_TOTAL_START(memset_tensor);
|
WEBGPU_CPU_PROFILE_TOTAL_START(memset_tensor);
|
||||||
|
|
||||||
WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_memset_tensor(" << buffer << ", " << tensor << ", " << value << ", "
|
|
||||||
<< offset << ", " << size << ")");
|
|
||||||
|
|
||||||
ggml_backend_webgpu_buffer_context * buf_ctx = (ggml_backend_webgpu_buffer_context *) buffer->context;
|
ggml_backend_webgpu_buffer_context * buf_ctx = (ggml_backend_webgpu_buffer_context *) buffer->context;
|
||||||
|
|
||||||
|
WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_memset_tensor(" << buf_ctx->label << ", " << tensor << ", " << value
|
||||||
|
<< ", " << offset << ", " << size << ")");
|
||||||
|
|
||||||
size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset;
|
size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset;
|
||||||
|
|
||||||
// This is a trick to set all bytes of a u32 to the same 1 byte value.
|
// This is a trick to set all bytes of a u32 to the same 1 byte value.
|
||||||
@@ -1354,12 +1356,13 @@ static void ggml_backend_webgpu_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
|||||||
const void * data,
|
const void * data,
|
||||||
size_t offset,
|
size_t offset,
|
||||||
size_t size) {
|
size_t size) {
|
||||||
WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", "
|
|
||||||
<< offset << ", " << size << ")");
|
|
||||||
WEBGPU_CPU_PROFILE_TOTAL_START(set_tensor);
|
WEBGPU_CPU_PROFILE_TOTAL_START(set_tensor);
|
||||||
ggml_backend_webgpu_buffer_context * buf_ctx = (ggml_backend_webgpu_buffer_context *) buffer->context;
|
ggml_backend_webgpu_buffer_context * buf_ctx = (ggml_backend_webgpu_buffer_context *) buffer->context;
|
||||||
webgpu_context webgpu_ctx = buf_ctx->webgpu_ctx;
|
webgpu_context webgpu_ctx = buf_ctx->webgpu_ctx;
|
||||||
|
|
||||||
|
WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_set_tensor(" << buf_ctx->label << ", " << tensor << ", " << data
|
||||||
|
<< ", " << offset << ", " << size << ")");
|
||||||
|
|
||||||
size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset;
|
size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset;
|
||||||
|
|
||||||
webgpu_ctx->queue.WriteBuffer(buf_ctx->buffer, total_offset, data, (size / 4) * 4);
|
webgpu_ctx->queue.WriteBuffer(buf_ctx->buffer, total_offset, data, (size / 4) * 4);
|
||||||
@@ -1397,12 +1400,12 @@ static void ggml_backend_webgpu_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
|||||||
void * data,
|
void * data,
|
||||||
size_t offset,
|
size_t offset,
|
||||||
size_t size) {
|
size_t size) {
|
||||||
WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", "
|
|
||||||
<< offset << ", " << size << ")");
|
|
||||||
WEBGPU_CPU_PROFILE_TOTAL_START(get_tensor);
|
WEBGPU_CPU_PROFILE_TOTAL_START(get_tensor);
|
||||||
ggml_backend_webgpu_buffer_context * buf_ctx = (ggml_backend_webgpu_buffer_context *) buffer->context;
|
ggml_backend_webgpu_buffer_context * buf_ctx = (ggml_backend_webgpu_buffer_context *) buffer->context;
|
||||||
webgpu_context webgpu_ctx = buf_ctx->webgpu_ctx;
|
WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_get_tensor(" << buf_ctx->label << ", " << tensor << ", " << data
|
||||||
wgpu::Device device = webgpu_ctx->device;
|
<< ", " << offset << ", " << size << ")");
|
||||||
|
webgpu_context webgpu_ctx = buf_ctx->webgpu_ctx;
|
||||||
|
wgpu::Device device = webgpu_ctx->device;
|
||||||
|
|
||||||
size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset;
|
size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset;
|
||||||
|
|
||||||
@@ -1473,16 +1476,20 @@ static const char * ggml_backend_webgpu_buffer_type_get_name(ggml_backend_buffer
|
|||||||
|
|
||||||
static ggml_backend_buffer_t ggml_backend_webgpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
static ggml_backend_buffer_t ggml_backend_webgpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
||||||
size_t size) {
|
size_t size) {
|
||||||
WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_type_alloc_buffer(" << size << ")");
|
static std::atomic<int> buffer_count;
|
||||||
|
int buffer_id = buffer_count++;
|
||||||
|
std::string buf_name = "tensor_buf" + std::to_string(buffer_id);
|
||||||
|
WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_type_alloc_buffer_" << buffer_id << ": " << size << " bytes");
|
||||||
ggml_backend_webgpu_device_context * ctx = static_cast<ggml_backend_webgpu_device_context *>(buft->device->context);
|
ggml_backend_webgpu_device_context * ctx = static_cast<ggml_backend_webgpu_device_context *>(buft->device->context);
|
||||||
|
|
||||||
wgpu::Buffer buf;
|
wgpu::Buffer buf;
|
||||||
ggml_webgpu_create_buffer(ctx->webgpu_ctx->device, buf,
|
ggml_webgpu_create_buffer(ctx->webgpu_ctx->device, buf,
|
||||||
(size + WEBGPU_STORAGE_BUF_BINDING_MULT - 1) & ~(WEBGPU_STORAGE_BUF_BINDING_MULT - 1),
|
(size + WEBGPU_STORAGE_BUF_BINDING_MULT - 1) & ~(WEBGPU_STORAGE_BUF_BINDING_MULT - 1),
|
||||||
wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst,
|
wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst,
|
||||||
"allocated_buffer");
|
buf_name.c_str());
|
||||||
|
|
||||||
ggml_backend_webgpu_buffer_context * buf_ctx = new ggml_backend_webgpu_buffer_context(ctx->webgpu_ctx, buf);
|
ggml_backend_webgpu_buffer_context * buf_ctx =
|
||||||
|
new ggml_backend_webgpu_buffer_context(ctx->webgpu_ctx, buf, buf_name);
|
||||||
|
|
||||||
return ggml_backend_buffer_init(buft, ggml_backend_webgpu_buffer_interface, buf_ctx, size);
|
return ggml_backend_buffer_init(buft, ggml_backend_webgpu_buffer_interface, buf_ctx, size);
|
||||||
}
|
}
|
||||||
@@ -2129,6 +2136,15 @@ static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t
|
|||||||
required_features.push_back(wgpu::FeatureName::TimestampQuery);
|
required_features.push_back(wgpu::FeatureName::TimestampQuery);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
const char * const deviceEnabledToggles[] = { "skip_validation", "disable_robustness", "disable_workgroup_init",
|
||||||
|
"disable_polyfills_on_integer_div_and_mod" };
|
||||||
|
const char * const deviceDisabledToggles[] = { "timestamp_quantization" };
|
||||||
|
wgpu::DawnTogglesDescriptor deviceTogglesDesc;
|
||||||
|
deviceTogglesDesc.enabledToggles = deviceEnabledToggles;
|
||||||
|
deviceTogglesDesc.enabledToggleCount = 4;
|
||||||
|
deviceTogglesDesc.disabledToggles = deviceDisabledToggles;
|
||||||
|
deviceTogglesDesc.disabledToggleCount = 1;
|
||||||
|
|
||||||
wgpu::DeviceDescriptor dev_desc;
|
wgpu::DeviceDescriptor dev_desc;
|
||||||
dev_desc.requiredLimits = &ctx->limits;
|
dev_desc.requiredLimits = &ctx->limits;
|
||||||
dev_desc.requiredFeatures = required_features.data();
|
dev_desc.requiredFeatures = required_features.data();
|
||||||
@@ -2146,6 +2162,7 @@ static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t
|
|||||||
GGML_ABORT("ggml_webgpu: Device error! Reason: %d, Message: %s\n", static_cast<int>(reason),
|
GGML_ABORT("ggml_webgpu: Device error! Reason: %d, Message: %s\n", static_cast<int>(reason),
|
||||||
std::string(message).c_str());
|
std::string(message).c_str());
|
||||||
});
|
});
|
||||||
|
dev_desc.nextInChain = &deviceTogglesDesc;
|
||||||
ctx->instance.WaitAny(ctx->adapter.RequestDevice(
|
ctx->instance.WaitAny(ctx->adapter.RequestDevice(
|
||||||
&dev_desc, wgpu::CallbackMode::AllowSpontaneous,
|
&dev_desc, wgpu::CallbackMode::AllowSpontaneous,
|
||||||
[ctx](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) {
|
[ctx](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) {
|
||||||
@@ -2243,11 +2260,18 @@ ggml_backend_reg_t ggml_backend_webgpu_reg() {
|
|||||||
ctx.name = GGML_WEBGPU_NAME;
|
ctx.name = GGML_WEBGPU_NAME;
|
||||||
ctx.device_count = 1;
|
ctx.device_count = 1;
|
||||||
|
|
||||||
|
const char * const instanceEnabledToggles[] = { "allow_unsafe_apis" };
|
||||||
|
|
||||||
|
wgpu::DawnTogglesDescriptor instanceTogglesDesc;
|
||||||
|
instanceTogglesDesc.enabledToggles = instanceEnabledToggles;
|
||||||
|
instanceTogglesDesc.enabledToggleCount = 1;
|
||||||
wgpu::InstanceDescriptor instance_descriptor{};
|
wgpu::InstanceDescriptor instance_descriptor{};
|
||||||
std::vector<wgpu::InstanceFeatureName> instance_features = { wgpu::InstanceFeatureName::TimedWaitAny };
|
std::vector<wgpu::InstanceFeatureName> instance_features = { wgpu::InstanceFeatureName::TimedWaitAny };
|
||||||
instance_descriptor.requiredFeatures = instance_features.data();
|
instance_descriptor.requiredFeatures = instance_features.data();
|
||||||
instance_descriptor.requiredFeatureCount = instance_features.size();
|
instance_descriptor.requiredFeatureCount = instance_features.size();
|
||||||
webgpu_ctx->instance = wgpu::CreateInstance(&instance_descriptor);
|
instance_descriptor.nextInChain = &instanceTogglesDesc;
|
||||||
|
|
||||||
|
webgpu_ctx->instance = wgpu::CreateInstance(&instance_descriptor);
|
||||||
GGML_ASSERT(webgpu_ctx->instance != nullptr);
|
GGML_ASSERT(webgpu_ctx->instance != nullptr);
|
||||||
|
|
||||||
static ggml_backend_reg reg = {
|
static ggml_backend_reg reg = {
|
||||||
|
|||||||
Reference in New Issue
Block a user