lookahead : add example for lookahead decoding (#4207)

* lookahead : init * lookahead : generate and store n-grams * lookahead : use loop instead recursion to generate n-grams * lookahead : initial working implementation * lookahead : filter repeating n-grams * lookahead : use deterministic init * lookahead : add to Makefile * lookahead : fix a bug in the seq_id of the lookahead tokens * lookahead : add comments --------- Co-authored-by: slaren <slarengh@gmail.com>
2025-11-18 11:46:58 +00:00 · 2023-11-26 20:33:07 +02:00
parent 22da05536f
commit 922754a8d6
5 changed files with 498 additions and 1 deletions
--- a/5
+++ b/5
@@ -2,7 +2,7 @@
 BUILD_TARGETS = \
 	main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
 	simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search  \
-	speculative infill tokenize benchmark-matmult parallel finetune export-lora tests/test-c.o
+	speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead tests/test-c.o

 # Binaries only useful for tests
 TEST_TARGETS = \
@@ -657,6 +657,9 @@ speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS)
 parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

+lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
 ifdef LLAMA_METAL
 metal: examples/metal/metal.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)