mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	 4760e7cc0b
			
		
	
	4760e7cc0b
	
	
	
		
			
			* sync : ggml (backend v2) (wip) * sync : migrate examples and llama.cpp to dynamic graphs (wip) * sync : update tests + fix max op params to 64 ggml-ci * sync : ggml-cuda ggml-ci * llama : fix save/load state context size ggml-ci * sync : try to fix build on tvOS * sync : pass custom graph sizes in training examples * sync : update graph copies to new ggml API * sync : update sync-ggml.sh with new files * scripts : fix header in sync script * train : fix context size calculations * llama : increase inference graph size up to 4096 nodes * train : allocate grads for backward graphs * train : allocate grads for gb_tmp
		
			
				
	
	
		
			234 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			234 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Various helper functions and utilities for training
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include <string>
 | |
| #include <random>
 | |
| #include <vector>
 | |
| 
 | |
| #include "ggml.h"
 | |
| #include "llama.h"
 | |
| 
 | |
| #define LLAMA_TRAIN_MAX_NODES 16384
 | |
| 
 | |
| typedef std::string mt19937_state;
 | |
| 
 | |
| struct train_state {
 | |
|     struct ggml_opt_context * opt;
 | |
| 
 | |
|     uint64_t train_its;
 | |
|     uint64_t train_samples;
 | |
|     uint64_t train_tokens;
 | |
|     uint64_t train_epochs;
 | |
| 
 | |
|     size_t        shuffle_samples_hash; // fn, sample_count, *zip(sample_begins, sample_sizes)
 | |
|     mt19937_state shuffle_rng_state_current;
 | |
|     mt19937_state shuffle_rng_state_next;
 | |
|     size_t        shuffle_sample_count;
 | |
|     size_t        shuffle_next_sample;
 | |
| };
 | |
| 
 | |
| struct train_params_common {
 | |
|     const char * fn_train_data;
 | |
|     const char * fn_checkpoint_in;
 | |
|     const char * fn_checkpoint_out;
 | |
|     const char * pattern_fn_it;
 | |
|     const char * fn_latest;
 | |
| 
 | |
|     bool print_usage;
 | |
| 
 | |
|     int save_every;
 | |
| 
 | |
|     uint32_t seed;
 | |
| 
 | |
|     int n_ctx;
 | |
|     int n_threads;
 | |
|     int n_batch;
 | |
|     int n_gradient_accumulation;
 | |
|     int n_epochs;
 | |
|     int n_gpu_layers;
 | |
| 
 | |
|     bool custom_n_ctx;
 | |
| 
 | |
|     bool use_flash;
 | |
|     bool use_checkpointing;
 | |
| 
 | |
|     std::string sample_start;
 | |
|     bool include_sample_start;
 | |
|     bool escape;
 | |
|     bool overlapping_samples;
 | |
|     bool fill_with_next_samples;
 | |
|     bool separate_with_eos;
 | |
|     bool separate_with_bos;
 | |
|     bool sample_random_offsets;
 | |
| 
 | |
|     bool force_reshuffle;
 | |
| 
 | |
|     int   warmup;
 | |
|     int   cos_decay_steps;
 | |
|     float cos_decay_restart;
 | |
|     float cos_decay_min;
 | |
|     bool  enable_restart;
 | |
| 
 | |
|     int   opt_past;
 | |
|     float opt_delta;
 | |
|     int   opt_max_no_improvement;
 | |
| 
 | |
|     int   adam_n_iter;
 | |
|     float adam_alpha;
 | |
|     float adam_min_alpha;
 | |
|     float adam_decay;
 | |
|     int   adam_decay_min_ndim;
 | |
|     float adam_beta1;
 | |
|     float adam_beta2;
 | |
|     float adam_gclip;
 | |
|     float adam_eps_f;
 | |
| };
 | |
| 
 | |
| typedef void (*save_train_files_callback)(void * data, struct train_state * train);
 | |
| 
 | |
| struct train_opt_callback_data {
 | |
|     struct train_params_common * params;
 | |
|     struct train_state         * train;
 | |
|     save_train_files_callback    save_cb;
 | |
|     void                       * save_data;
 | |
|     struct llama_context       * lctx;
 | |
|     int                          last_save_iter;
 | |
|     llama_token                * tokens_data;
 | |
|     size_t                       tokens_size;
 | |
|     size_t                     * samples_begin;
 | |
|     size_t                     * samples_size;
 | |
|     size_t                     * shuffled_samples_offs;
 | |
|     size_t                     * shuffled_samples_begin;
 | |
|     size_t                     * shuffled_samples_size;
 | |
|     size_t                       samples_count;
 | |
|     struct ggml_tensor         * tokens_input;
 | |
|     struct ggml_tensor         * target_probs;
 | |
|     int                          first_iter;
 | |
|     int                          first_epoch;
 | |
|     int                          iter_at_last_epoch;
 | |
|     int64_t                      last_time;
 | |
|     double                       millis_per_iter;
 | |
| };
 | |
| 
 | |
| struct train_state * init_train_state();
 | |
| void free_train_state(struct train_state  * state);
 | |
| 
 | |
| struct train_params_common get_default_train_params_common();
 | |
| void print_common_train_usage(int /*argc*/, char ** argv, const struct train_params_common * params);
 | |
| 
 | |
| bool consume_common_train_arg(int argc, char ** argv, int * idx, struct train_params_common * params, bool * invalid_param);
 | |
| void finish_processing_train_args(struct train_params_common * params);
 | |
| 
 | |
| struct random_normal_distribution;
 | |
| struct random_uniform_distribution;
 | |
| 
 | |
| struct random_normal_distribution  * init_random_normal_distribution (int seed, float mean, float std, float min, float max);
 | |
| struct random_uniform_distribution * init_random_uniform_distribution(int seed, float min, float max);
 | |
| 
 | |
| void free_random_normal_distribution (struct random_normal_distribution  * rnd);
 | |
| void free_random_uniform_distribution(struct random_uniform_distribution * rnd);
 | |
| 
 | |
| struct ggml_tensor * randomize_tensor_normal (struct ggml_tensor * tensor, struct random_normal_distribution * rnd);
 | |
| struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struct random_uniform_distribution * rnd);
 | |
| 
 | |
| // generate random float in interval [0,1)
 | |
| float frand();
 | |
| float frand_normal (struct random_normal_distribution * rnd);
 | |
| float frand_uniform(struct random_uniform_distribution * rnd);
 | |
| 
 | |
| int   clamp (const int v, const int min, const int max);
 | |
| float fclamp(const float v, const float min, const float max);
 | |
| 
 | |
| void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0);
 | |
| void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1);
 | |
| void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2);
 | |
| void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3);
 | |
| 
 | |
| size_t tokenize_file(
 | |
|         struct llama_context     * lctx,
 | |
|         const char               * filename,
 | |
|         const std::string        & sample_start,
 | |
|         bool                       include_sample_start,
 | |
|         bool                       overlapping_samples,
 | |
|         unsigned                   context_length,
 | |
|         std::vector<llama_token> & out_tokens,
 | |
|         std::vector<size_t>      & out_samples_begin,
 | |
|         std::vector<size_t>      & out_samples_size);
 | |
| 
 | |
| int64_t get_example_targets_batch(
 | |
|         struct llama_context * lctx,
 | |
|         struct ggml_tensor   * tokens_input,
 | |
|         struct ggml_tensor   * target_probs,
 | |
|         int64_t                example_id,
 | |
|         const size_t         * samples_offs,
 | |
|         const size_t         * samples_begin,
 | |
|         const size_t         * samples_size,
 | |
|               size_t           samples_count,
 | |
|         const llama_token    * train_data,
 | |
|         size_t                 n_train_data,
 | |
|         bool                   separate_with_eos,
 | |
|         bool                   separate_with_bos,
 | |
|         bool                   fill_with_next_samples,
 | |
|         bool                   sample_random_offsets);
 | |
| 
 | |
| 
 | |
| void          mt19937_set_state(std::mt19937& rng, const mt19937_state& rng_state);
 | |
| mt19937_state mt19937_get_state(const std::mt19937& rng);
 | |
| mt19937_state mt19937_seed_to_state(unsigned seed);
 | |
| 
 | |
| mt19937_state shuffle_samples(
 | |
|         const mt19937_state & rng_state,
 | |
|         size_t              * shuffled_offs,
 | |
|         size_t              * shuffled_begins,
 | |
|         size_t              * shuffled_sizes,
 | |
|         const size_t        * begins,
 | |
|         const size_t        * sizes,
 | |
|         size_t                count);
 | |
| 
 | |
| size_t hash_combine(size_t h1, size_t h2);
 | |
| 
 | |
| size_t compute_samples_hash(
 | |
|     const char* fn,
 | |
|     const size_t* samples_begin,
 | |
|     const size_t* samples_size,
 | |
|     size_t sample_count);
 | |
| 
 | |
| 
 | |
| std::string replace_str(const char * s, const char * needle, const char * replacement);
 | |
| 
 | |
| void print_duration(double milliseconds);
 | |
| 
 | |
| float cosine_decay(
 | |
|     int64_t step,
 | |
|     int64_t decay_steps,
 | |
|     float   minimum);
 | |
| 
 | |
| float cosine_decay_restart(
 | |
|     int64_t step,
 | |
|     int64_t decay_steps,
 | |
|     float   minimum,
 | |
|     float   restart_step_mult);
 | |
| 
 | |
| float learning_schedule(
 | |
|     int64_t step,
 | |
|     int64_t warmup_steps,
 | |
|     int64_t decay_steps,
 | |
|     float   learning_rate,
 | |
|     float   overall_minimum,
 | |
|     float   cos_decay_minimum,
 | |
|     float   cos_decay_restart_step_mult,
 | |
|     bool    enable_restart);
 | |
| 
 | |
| void copy_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name);
 | |
| 
 | |
| void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct ggml_opt_context * opt);
 | |
| void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * opt);
 | |
| 
 | |
| bool load_train_state_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct train_state * train);
 | |
| void save_train_state_gguf(struct gguf_context * fctx, struct train_state * train);
 | |
| 
 | |
| std::string get_train_filename(const char * filename, const char * pattern_it, const char * latest, int64_t iteration);
 | |
| 
 | |
| void train_opt_callback(void * vdata, int accum_step, float * sched, bool * cancel);
 |