mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	cleanup and stuff
This commit is contained in:
		| @@ -362,12 +362,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { | ||||
|                 break; | ||||
|             } | ||||
|             params.steering_mul = std::stof(argv[i]); | ||||
|         } else if (arg == "--steering-lyr") { | ||||
|         } else if (arg == "--steering-layer") { | ||||
|             if (++i >= argc) { | ||||
|                 invalid_param = true; | ||||
|                 break; | ||||
|             } | ||||
|             params.steering_lyr = std::stoi(argv[i]); | ||||
|             params.steering_layer = std::stoi(argv[i]); | ||||
|         } else { | ||||
|             fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); | ||||
|             gpt_print_usage(argc, argv, default_params); | ||||
| @@ -454,6 +454,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { | ||||
|     } | ||||
|     fprintf(stderr, "  -ngl N, --n-gpu-layers N\n"); | ||||
|     fprintf(stderr, "                        number of layers to store in VRAM\n"); | ||||
|     fprintf(stderr, "  --steering-add        add positive steering prompt\n"); | ||||
|     fprintf(stderr, "  --steering-sub        add negativ steering prompt\n"); | ||||
|     fprintf(stderr, "  --steering-mul        set steering strength (negative is reverse, default %.1f)\n", params.steering_mul); | ||||
|     fprintf(stderr, "  --steering-layer      set layer for steering (default %d)\n", params.steering_layer); | ||||
|     fprintf(stderr, "  --mtest               compute maximum memory usage\n"); | ||||
|     fprintf(stderr, "  --verbose-prompt      print prompt before generation\n"); | ||||
|     fprintf(stderr, "  --lora FNAME          apply LoRA adapter (implies --no-mmap)\n"); | ||||
|   | ||||
| @@ -73,10 +73,10 @@ struct gpt_params { | ||||
|     bool mem_test          = false; // compute maximum memory usage | ||||
|     bool verbose_prompt    = false; // print prompt tokens before generation | ||||
|  | ||||
|     std::string steering_add = ""; | ||||
|     std::string steering_sub = ""; | ||||
|     std::string steering_add; | ||||
|     std::string steering_sub; | ||||
|     float       steering_mul = 1.0f; | ||||
|     int         steering_lyr = 20; | ||||
|     int         steering_layer = 15; | ||||
| }; | ||||
|  | ||||
| bool gpt_params_parse(int argc, char ** argv, gpt_params & params); | ||||
|   | ||||
| @@ -136,28 +136,6 @@ int main(int argc, char ** argv) { | ||||
|         return 0; | ||||
|     } | ||||
|  | ||||
|     if (params.steering_add.size() || params.steering_sub.size()) | ||||
|     { | ||||
|         auto steering_add_tokens = ::llama_tokenize(ctx, params.steering_add, true); | ||||
|         auto steering_sub_tokens = ::llama_tokenize(ctx, params.steering_sub, true); | ||||
|  | ||||
|         if (steering_add_tokens.size() != steering_sub_tokens.size()) { | ||||
|             llama_token space; | ||||
|             llama_tokenize(ctx, " ", &space, 1, 0); | ||||
|  | ||||
|             while (steering_add_tokens.size() < steering_sub_tokens.size()) steering_add_tokens.push_back(space); | ||||
|             while (steering_sub_tokens.size() < steering_add_tokens.size()) steering_sub_tokens.push_back(space); | ||||
|         } | ||||
|  | ||||
|         llama_set_steering_write(ctx, params.steering_lyr, params.steering_mul/2); | ||||
|         llama_eval(ctx, steering_add_tokens.data(), std::min((int)steering_add_tokens.size(), params.n_ctx), 0, params.n_threads); | ||||
|  | ||||
|         llama_set_steering_write(ctx, params.steering_lyr, -params.steering_mul/2); | ||||
|         llama_eval(ctx, steering_sub_tokens.data(), std::min((int)steering_sub_tokens.size(), params.n_ctx), 0, params.n_threads); | ||||
|  | ||||
|         llama_set_steering_read(ctx, params.steering_lyr, 1); | ||||
|     } | ||||
|  | ||||
|     // Add a space in front of the first character to match OG llama tokenizer behavior | ||||
|     params.prompt.insert(0, 1, ' '); | ||||
|  | ||||
| @@ -196,6 +174,32 @@ int main(int argc, char ** argv) { | ||||
|         return 1; | ||||
|     } | ||||
|  | ||||
|     if (!params.steering_add.empty() || !params.steering_sub.empty()) | ||||
|     { | ||||
|         params.steering_add.insert(0, 1, ' '); | ||||
|         params.steering_sub.insert(0, 1, ' '); | ||||
|  | ||||
|         auto add_tokens = ::llama_tokenize(ctx, params.steering_add, true); | ||||
|         auto sub_tokens = ::llama_tokenize(ctx, params.steering_sub, true); | ||||
|  | ||||
|         //if (add_tokens.size() != sub_tokens.size()) { | ||||
|         //    while (add_tokens.size() < sub_tokens.size()) { | ||||
|         //        add_tokens.push_back(llama_token_nl()); | ||||
|         //    } | ||||
|         //    while (sub_tokens.size() < add_tokens.size()) { | ||||
|         //        sub_tokens.push_back(llama_token_nl()); | ||||
|         //    } | ||||
|         //} | ||||
|         //const int N = embd_inp.size(); | ||||
|         llama_set_steering_write(ctx, params.steering_layer, +1.0f); | ||||
|         llama_eval(ctx, add_tokens.data(), std::min((int)add_tokens.size(), n_ctx), 0, params.n_threads); | ||||
|  | ||||
|         llama_set_steering_write(ctx, params.steering_layer, -1.0f); | ||||
|         llama_eval(ctx, sub_tokens.data(), std::min((int)sub_tokens.size(), n_ctx), 0, params.n_threads); | ||||
|  | ||||
|         llama_set_steering_read(ctx, params.steering_layer, params.steering_mul); | ||||
|     } | ||||
|  | ||||
|     // debug message about similarity of saved session, if applicable | ||||
|     size_t n_matching_session_tokens = 0; | ||||
|     if (session_tokens.size()) { | ||||
|   | ||||
							
								
								
									
										23
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										23
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -287,6 +287,9 @@ void llama_set_steering_read(struct llama_context * ctx, int layer, float mul) { | ||||
|     ctx->steering_mode = STEERING_READ; | ||||
|     ctx->steering_mul = mul; | ||||
|     ctx->steering_layer = layer; | ||||
|     //FILE* steeringbin = fopen("steering.bin", "wb"); | ||||
|     //fwrite(ctx->steering_vector.data(), sizeof(float), ctx->steering_vector.size(), steeringbin); | ||||
|     //fclose(steeringbin); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| @@ -1163,8 +1166,9 @@ static bool llama_eval_internal( | ||||
|  | ||||
|     struct ggml_tensor * steer; | ||||
|     if (lctx.steering_mode != STEERING_OFF) { | ||||
|         steer = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_ctx, n_embd); | ||||
|         memcpy(steer->data, lctx.steering_vector.data(), ggml_nbytes(steer)); | ||||
|         steer = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N); | ||||
|         //steer->data = lctx.steering_vector.data() + n_past * n_embd * sizeof(float); | ||||
|         memcpy(steer->data, lctx.steering_vector.data() + n_past * n_embd * sizeof(float), ggml_nbytes(steer)); | ||||
|     } | ||||
|  | ||||
|     struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd); | ||||
| @@ -1177,15 +1181,14 @@ static bool llama_eval_internal( | ||||
|         lctx.use_buf(ctx0, 0); | ||||
|  | ||||
|         if (lctx.steering_mode != STEERING_OFF && il == lctx.steering_layer) { | ||||
|             steer->data = lctx.steering_vector.data(); | ||||
|  | ||||
|             struct ggml_tensor * src = ggml_scale(ctx0, inpL, ggml_new_f32(ctx0, lctx.steering_mul)); | ||||
|             struct ggml_tensor * dst = ggml_view_2d(ctx0, steer, n_embd, N, n_embd * sizeof(float), n_past * n_embd * sizeof(float)); | ||||
|             struct ggml_tensor * scal = ggml_new_f32(ctx0, lctx.steering_mul); | ||||
|             if (lctx.steering_mode == STEERING_WRITE) { | ||||
|                 ggml_build_forward_expand(&gf, ggml_cpy(ctx0, ggml_add(ctx0, src, dst), dst)); | ||||
|             } else { | ||||
|                 inpL = src; | ||||
|                 ggml_build_forward_expand(&gf, ggml_cpy(ctx0, | ||||
|                     ggml_add(ctx0, ggml_scale(ctx0, inpL, scal), steer), steer)); | ||||
|                 break; | ||||
|             } | ||||
|              | ||||
|             inpL = ggml_add(ctx0, ggml_scale(ctx0, steer, scal), inpL); | ||||
|         } | ||||
|  | ||||
|         // norm | ||||
| @@ -1403,7 +1406,7 @@ static bool llama_eval_internal( | ||||
|  | ||||
|  | ||||
|     if (lctx.steering_mode == STEERING_WRITE) { | ||||
|         memcpy(lctx.steering_vector.data(), steer->data, ggml_nbytes(steer)); | ||||
|         memcpy(lctx.steering_vector.data() + n_past * n_embd * sizeof(float), steer->data, ggml_nbytes(steer)); | ||||
|     } | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Henri Vasserman
					Henri Vasserman