mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	llama/ggml: add LLM training support (#10544)
* llama/ggml: add LLM training support more compact progress bar llama_save_model_to_file llama_opt_param_filter ggml_graph_dup force_grads refactor ggml_opt, fix test-opt * remove logits_all * refactor CUDA implementation for ACC * reset graph at beginning of opt period
This commit is contained in:
		@@ -1,5 +1,7 @@
 | 
			
		||||
#include "llama-vocab.h"
 | 
			
		||||
 | 
			
		||||
#include "ggml.h"
 | 
			
		||||
#include "gguf.h"
 | 
			
		||||
#include "llama-impl.h"
 | 
			
		||||
#include "llama-model-loader.h"
 | 
			
		||||
 | 
			
		||||
@@ -1234,6 +1236,9 @@ struct fragment_buffer_variant {
 | 
			
		||||
struct llama_vocab::impl {
 | 
			
		||||
    uint32_t n_token_types = 0; // for BERT-style token types
 | 
			
		||||
 | 
			
		||||
    std::string tokenizer_model;
 | 
			
		||||
    std::string tokenizer_pre;
 | 
			
		||||
 | 
			
		||||
    enum llama_vocab_type     type     = LLAMA_VOCAB_TYPE_SPM;
 | 
			
		||||
    enum llama_vocab_pre_type pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
 | 
			
		||||
 | 
			
		||||
@@ -1369,9 +1374,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 | 
			
		||||
 | 
			
		||||
    // determine vocab type
 | 
			
		||||
    {
 | 
			
		||||
        std::string tokenizer_model;
 | 
			
		||||
        std::string tokenizer_pre;
 | 
			
		||||
 | 
			
		||||
        ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_model);
 | 
			
		||||
        ml.get_key(LLM_KV_TOKENIZER_PRE,   tokenizer_pre, false);
 | 
			
		||||
 | 
			
		||||
@@ -1466,7 +1468,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 | 
			
		||||
 | 
			
		||||
            const int precompiled_charsmap_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str());
 | 
			
		||||
            if (precompiled_charsmap_keyidx != -1) {
 | 
			
		||||
                size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
 | 
			
		||||
                const gguf_type pc_type = gguf_get_arr_type(ctx, precompiled_charsmap_keyidx);
 | 
			
		||||
                GGML_ASSERT(pc_type == GGUF_TYPE_INT8 || pc_type == GGUF_TYPE_UINT8);
 | 
			
		||||
 | 
			
		||||
                const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
 | 
			
		||||
                const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
 | 
			
		||||
                precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
 | 
			
		||||
#ifdef IS_BIG_ENDIAN
 | 
			
		||||
@@ -2789,6 +2794,14 @@ void llama_vocab::load(llama_model_loader & ml, const LLM_KV & kv) {
 | 
			
		||||
    pimpl->load(ml, kv);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string llama_vocab::get_tokenizer_model() const {
 | 
			
		||||
    return pimpl->tokenizer_model;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string llama_vocab::get_tokenizer_pre() const {
 | 
			
		||||
    return pimpl->tokenizer_pre;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
enum llama_vocab_type llama_vocab::get_type() const {
 | 
			
		||||
    return pimpl->type;
 | 
			
		||||
}
 | 
			
		||||
@@ -3011,6 +3024,20 @@ int llama_vocab::find_bpe_rank(const std::string & token_left, const std::string
 | 
			
		||||
    return it->second;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<std::string> llama_vocab::get_bpe_merges() const {
 | 
			
		||||
    std::vector<std::string> result(pimpl->bpe_ranks.size());
 | 
			
		||||
 | 
			
		||||
    for (const auto & pair : pimpl->bpe_ranks) {
 | 
			
		||||
        result[pair.second] = pair.first.first + " " + pair.first.second;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<char> llama_vocab::get_precompiled_charsmap() const {
 | 
			
		||||
    return pimpl->precompiled_charsmap;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int32_t llama_vocab::tokenize(
 | 
			
		||||
                  const char * text,
 | 
			
		||||
                     int32_t   text_len,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user