llama : improve infill support and special token detection (#9798)

* llama : improve infill support ggml-ci * llama : add more FIM token strings ggml-ci * server : update prompt on slot restore (#9800) * gguf : deprecate old FIM token KVs
2025-10-27 08:21:30 +00:00 · 2024-10-12 08:21:51 +03:00
parent 943d20b411
commit 11ac9800af
12 changed files with 601 additions and 427 deletions
--- a/common/common.h
+++ b/common/common.h
@@ -352,15 +352,28 @@ void common_init();

 std::string common_params_get_system_info(const common_params & params);

-bool parse_cpu_range(const std::string& range, bool(&boolmask)[GGML_MAX_N_THREADS]);
-bool parse_cpu_mask(const std::string& mask, bool(&boolmask)[GGML_MAX_N_THREADS]);
-void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model = nullptr);
+bool parse_cpu_range(const std::string & range, bool(&boolmask)[GGML_MAX_N_THREADS]);
+bool parse_cpu_mask(const std::string & mask, bool(&boolmask)[GGML_MAX_N_THREADS]);
+void postprocess_cpu_params(cpu_params & cpuparams, const cpu_params * role_model = nullptr);
 bool set_process_priority(enum ggml_sched_priority prio);

 //
 // String utils
 //

+#ifdef __GNUC__
+#ifdef __MINGW32__
+#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
+#else
+#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
+#endif
+#else
+#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
+#endif
+
+LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
+std::string string_format(const char * fmt, ...);
+
 std::vector<std::string> string_split(std::string input, char separator);

 std::string string_strip(const std::string & str);