mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
Add LLaDA-7b-MoE diffusion model (#16003)
This commit is contained in:
@@ -510,19 +510,27 @@ static void diffusion_generate(llama_context * ctx,
|
||||
n_generated = params.max_length;
|
||||
}
|
||||
|
||||
static std::string format_input_text(const std::string & prompt, bool use_chat_template, llama_model * model) {
|
||||
static std::string format_input_text(const std::string & prompt, const std::string & system_prompt, bool use_chat_template, llama_model * model) {
|
||||
if (!use_chat_template) {
|
||||
return prompt;
|
||||
}
|
||||
|
||||
auto chat_templates = common_chat_templates_init(model, "");
|
||||
|
||||
common_chat_templates_inputs inputs;
|
||||
common_chat_msg user_msg;
|
||||
user_msg.role = "user";
|
||||
user_msg.content = prompt;
|
||||
inputs.add_generation_prompt = true;
|
||||
common_chat_msg system_msg;
|
||||
|
||||
if (!system_prompt.empty()) {
|
||||
system_msg.role = "system";
|
||||
system_msg.content = system_prompt;
|
||||
inputs.messages.push_back(system_msg);
|
||||
}
|
||||
|
||||
common_chat_msg user_msg;
|
||||
user_msg.role = "user";
|
||||
user_msg.content = prompt;
|
||||
|
||||
inputs.messages.push_back(user_msg);
|
||||
inputs.add_generation_prompt = true;
|
||||
|
||||
auto result = common_chat_templates_apply(chat_templates.get(), inputs);
|
||||
|
||||
@@ -579,7 +587,8 @@ int main(int argc, char ** argv) {
|
||||
llama_set_n_threads(ctx, params.cpuparams.n_threads, params.cpuparams_batch.n_threads);
|
||||
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
std::string formatted_prompt = format_input_text(params.prompt, params.enable_chat_template, model);
|
||||
|
||||
std::string formatted_prompt = format_input_text(params.prompt, params.system_prompt, params.enable_chat_template, model);
|
||||
|
||||
std::vector<llama_token> input_tokens = common_tokenize(vocab,
|
||||
formatted_prompt,
|
||||
@@ -596,6 +605,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
llama_token mask_token_id = llama_vocab_mask(vocab);
|
||||
|
||||
GGML_ASSERT(mask_token_id != LLAMA_TOKEN_NULL);
|
||||
|
||||
bool visual_mode = params.diffusion.visual_mode;
|
||||
|
||||
Reference in New Issue
Block a user