mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Merge branch 'master' into xsn/private_batch_api
This commit is contained in:
		| @@ -10,7 +10,6 @@ | ||||
| // Change JSON_ASSERT from assert() to GGML_ASSERT: | ||||
| #define JSON_ASSERT GGML_ASSERT | ||||
| #include "json.hpp" | ||||
| #include "json-schema-to-grammar.h" | ||||
| #include "llama.h" | ||||
|  | ||||
| #include <algorithm> | ||||
| @@ -483,6 +482,11 @@ void string_replace_all(std::string & s, const std::string & search, const std:: | ||||
|     s = std::move(builder); | ||||
| } | ||||
|  | ||||
| std::string regex_escape(const std::string & s) { | ||||
|     static const std::regex special_chars("[.^$|()*+?\\[\\]{}\\\\]"); | ||||
|     return std::regex_replace(s, special_chars, "\\$0"); | ||||
| } | ||||
|  | ||||
| std::string string_join(const std::vector<std::string> & values, const std::string & separator) { | ||||
|     std::ostringstream result; | ||||
|     for (size_t i = 0; i < values.size(); ++i) { | ||||
| @@ -953,8 +957,8 @@ struct common_init_result common_init_from_params(common_params & params) { | ||||
|         return iparams; | ||||
|     } | ||||
|  | ||||
|     if (params.ctx_shift && !llama_kv_cache_can_shift(lctx)) { | ||||
|         LOG_WRN("%s: KV cache shifting is not supported for this model, disabling KV cache shifting\n", __func__); | ||||
|     if (params.ctx_shift && !llama_kv_self_can_shift(lctx)) { | ||||
|         LOG_WRN("%s: KV cache shifting is not supported for this context, disabling KV cache shifting\n", __func__); | ||||
|         params.ctx_shift = false; | ||||
|     } | ||||
|  | ||||
| @@ -1060,7 +1064,7 @@ struct common_init_result common_init_from_params(common_params & params) { | ||||
|             llama_batch_ext_ptr batch(llama_batch_ext_init_from_text(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0)); | ||||
|             llama_decode_ext(lctx, batch.get()); | ||||
|         } | ||||
|         llama_kv_cache_clear(lctx); | ||||
|         llama_kv_self_clear(lctx); | ||||
|         llama_synchronize(lctx); | ||||
|         llama_perf_context_reset(lctx); | ||||
|     } | ||||
| @@ -2032,3 +2036,25 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c | ||||
|     return result; | ||||
| } | ||||
|  | ||||
| template <> | ||||
| json common_grammar_trigger::to_json() const { | ||||
|     json out { | ||||
|         {"type", (int) type}, | ||||
|         {"value", value}, | ||||
|     }; | ||||
|     if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) { | ||||
|         out["token"] = (int) token; | ||||
|     } | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| template <> | ||||
| common_grammar_trigger common_grammar_trigger::from_json(const json & in) { | ||||
|     common_grammar_trigger out; | ||||
|     out.type = (common_grammar_trigger_type) in.at("type").get<int>(); | ||||
|     out.value = in.at("value").get<std::string>(); | ||||
|     if (out.type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) { | ||||
|         out.token = (llama_token) in.at("token").get<int>(); | ||||
|     } | ||||
|     return out; | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Xuan Son Nguyen
					Xuan Son Nguyen