mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	Steer with inpSA instead of with inpL
Signed-off-by: Henri Vasserman <henv@hot.ee>
This commit is contained in:
		@@ -176,28 +176,27 @@ int main(int argc, char ** argv) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    if (!params.steering_add.empty() || !params.steering_sub.empty())
 | 
					    if (!params.steering_add.empty() || !params.steering_sub.empty())
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        params.steering_add.insert(0, 1, ' ');
 | 
					 | 
				
			||||||
        params.steering_sub.insert(0, 1, ' ');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        auto add_tokens = ::llama_tokenize(ctx, params.steering_add, true);
 | 
					        auto add_tokens = ::llama_tokenize(ctx, params.steering_add, true);
 | 
				
			||||||
        auto sub_tokens = ::llama_tokenize(ctx, params.steering_sub, true);
 | 
					        auto sub_tokens = ::llama_tokenize(ctx, params.steering_sub, true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        //if (add_tokens.size() != sub_tokens.size()) {
 | 
					
 | 
				
			||||||
        //    while (add_tokens.size() < sub_tokens.size()) {
 | 
					        if (add_tokens.size() != sub_tokens.size()) {
 | 
				
			||||||
        //        add_tokens.push_back(llama_token_nl());
 | 
					           while (add_tokens.size() < sub_tokens.size()) {
 | 
				
			||||||
        //    }
 | 
					               add_tokens.push_back(llama_token_nl());
 | 
				
			||||||
        //    while (sub_tokens.size() < add_tokens.size()) {
 | 
					           }
 | 
				
			||||||
        //        sub_tokens.push_back(llama_token_nl());
 | 
					           while (sub_tokens.size() < add_tokens.size()) {
 | 
				
			||||||
        //    }
 | 
					               sub_tokens.push_back(llama_token_nl());
 | 
				
			||||||
        //}
 | 
					           }
 | 
				
			||||||
        //const int N = embd_inp.size();
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        llama_set_steering_write(ctx, params.steering_source, +1.0f);
 | 
					        llama_set_steering_write(ctx, params.steering_source, +1.0f);
 | 
				
			||||||
        llama_eval(ctx, add_tokens.data(), std::min((int)add_tokens.size(), n_ctx), 0, params.n_threads);
 | 
					        llama_eval(ctx, add_tokens.data(), std::min((int)add_tokens.size(), n_ctx), 0, params.n_threads);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        llama_set_steering_write(ctx, params.steering_layer, -1.0f);
 | 
					        llama_set_steering_write(ctx, params.steering_source, -1.0f);
 | 
				
			||||||
        llama_eval(ctx, sub_tokens.data(), std::min((int)sub_tokens.size(), n_ctx), 0, params.n_threads);
 | 
					        llama_eval(ctx, sub_tokens.data(), std::min((int)sub_tokens.size(), n_ctx), 0, params.n_threads);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        llama_set_steering_read(ctx, params.steering_layer, params.steering_mul);
 | 
					        llama_set_steering_read(ctx, params.steering_layer, params.steering_mul);
 | 
				
			||||||
 | 
						std::cout << "Steering: `" << params.steering_add << "` - `" << params.steering_sub << "` * " << params.steering_mul << "\n";
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // debug message about similarity of saved session, if applicable
 | 
					    // debug message about similarity of saved session, if applicable
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -32,6 +32,7 @@
 | 
				
			|||||||
#include <mutex>
 | 
					#include <mutex>
 | 
				
			||||||
#include <sstream>
 | 
					#include <sstream>
 | 
				
			||||||
#include <numeric>
 | 
					#include <numeric>
 | 
				
			||||||
 | 
					#include <iostream>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define LLAMA_USE_SCRATCH
 | 
					#define LLAMA_USE_SCRATCH
 | 
				
			||||||
#define LLAMA_MAX_SCRATCH_BUFFERS 16
 | 
					#define LLAMA_MAX_SCRATCH_BUFFERS 16
 | 
				
			||||||
@@ -1187,8 +1188,8 @@ static bool llama_eval_internal(
 | 
				
			|||||||
                    ggml_add(ctx0, ggml_scale(ctx0, inpL, scal), steer), steer));
 | 
					                    ggml_add(ctx0, ggml_scale(ctx0, inpL, scal), steer), steer));
 | 
				
			||||||
                break;
 | 
					                break;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            
 | 
					            // std::cout << "\nAdding steering vector to inpL " << il << "\n";
 | 
				
			||||||
            inpL = ggml_add(ctx0, ggml_scale(ctx0, steer, scal), inpL);
 | 
					            inpSA = ggml_add(ctx0, ggml_scale(ctx0, steer, scal), inpSA);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // norm
 | 
					        // norm
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user