vulkan: Fix test-thread-safety crashes (#17024)

The std::map pipeline_flash_attn_f32_f16 could be searched and inserted at the
same time, which needs to hold the lock. To be safe, hold the lock for all of
ggml_vk_load_shaders.
This commit is contained in:
Jeff Bolz
2025-11-08 01:39:45 -06:00
committed by GitHub
parent e14e842e87
commit d6fe40fa00

View File

@@ -130,9 +130,9 @@ struct vk_pipeline_struct {
// true if fields have been set by ggml_vk_create_pipeline // true if fields have been set by ggml_vk_create_pipeline
bool initialized {}; bool initialized {};
// set to true to request the pipeline is compiled // set to true to request the pipeline is compiled
bool needed {}; std::atomic<bool> needed {};
// set to true when the shader has been compiled // set to true when the shader has been compiled
bool compiled {}; std::atomic<bool> compiled {};
// number of registers used, extracted from pipeline executable properties // number of registers used, extracted from pipeline executable properties
uint32_t register_count {}; uint32_t register_count {};
}; };
@@ -1842,10 +1842,7 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
} }
} }
{ device->all_pipelines.push_back(pipeline);
std::lock_guard<std::recursive_mutex> guard(device->mutex);
device->all_pipelines.push_back(pipeline);
}
{ {
std::lock_guard<std::mutex> guard(compile_count_mutex); std::lock_guard<std::mutex> guard(compile_count_mutex);
@@ -2536,6 +2533,7 @@ static uint32_t get_subgroup_size(const std::string &pipeline_name, const vk_dev
static void ggml_vk_load_shaders(vk_device& device) { static void ggml_vk_load_shaders(vk_device& device) {
VK_LOG_DEBUG("ggml_vk_load_shaders(" << device->name << ")"); VK_LOG_DEBUG("ggml_vk_load_shaders(" << device->name << ")");
std::lock_guard<std::recursive_mutex> guard(device->mutex);
// some shaders have a minimum subgroup size // some shaders have a minimum subgroup size
const uint32_t subgroup_size_8 = std::max(device->subgroup_size, 8u); const uint32_t subgroup_size_8 = std::max(device->subgroup_size, 8u);
const uint32_t subgroup_size_16 = std::max(device->subgroup_size, 16u); const uint32_t subgroup_size_16 = std::max(device->subgroup_size, 16u);
@@ -2729,6 +2727,8 @@ static void ggml_vk_load_shaders(vk_device& device) {
if (!pipeline->needed || pipeline->compiled) { if (!pipeline->needed || pipeline->compiled) {
return; return;
} }
// TODO: We're no longer benefitting from the async compiles (shaders are
// compiled individually, as needed) and this complexity can be removed.
{ {
// wait until fewer than N compiles are in progress // wait until fewer than N compiles are in progress
uint32_t N = std::max(1u, std::thread::hardware_concurrency()); uint32_t N = std::max(1u, std::thread::hardware_concurrency());
@@ -7914,12 +7914,15 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
vk_pipeline pipeline = nullptr; vk_pipeline pipeline = nullptr;
auto &pipelines = ctx->device->pipeline_flash_attn_f32_f16[k->type]; {
auto it = pipelines.find(fa_pipeline_state); std::lock_guard<std::recursive_mutex> guard(ctx->device->mutex);
if (it != pipelines.end()) { auto &pipelines = ctx->device->pipeline_flash_attn_f32_f16[k->type];
pipeline = it->second; auto it = pipelines.find(fa_pipeline_state);
} else { if (it != pipelines.end()) {
pipelines[fa_pipeline_state] = pipeline = std::make_shared<vk_pipeline_struct>(); pipeline = it->second;
} else {
pipelines[fa_pipeline_state] = pipeline = std::make_shared<vk_pipeline_struct>();
}
} }
assert(pipeline); assert(pipeline);