mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	* threadpool: skip polling for unused threads Currently all threads do N polling rounds even if only 1 thread is active (n_threads_cur == 1). This commit adds a check to skip the polling for unused threads (ith >= n_threads_cur). n_threads_cur is now an atomic_int to explicitly tell thread sanitizer that it is written from one thread and read from other threads (not a race conditions). * threadpool: further simplify and improve ggml_barrier Avoid using strict memory order while polling, yet make sure that all threads go through full memory barrier (memory fence) on ggml_barrier entrace and exit. * threads: add simple barrier test This test does lots of small, parallel matmul ops where the barriers in between dominate the overhead. * threadpool: improve thread sync for new-graphs Using the same tricks as ggml_barrier. All the polling is done with relaxed memory order to keep it efficient, once the new graph is detected we do full fence using read-modify-write with strict memory order. * threadpool: improve abort handling Do not use threadpool->ec (exit code) to decide whether to exit the compute loop. threadpool->ec is not atomic which makes thread-sanitizer rightfully unhappy about it. Instead introduce atomic threadpool->abort flag used for this. This is consistent with how we handle threadpool->stop or pause. While at it add an explicit atomic_load for n_threads_cur for consistency. * test-barrier: release threadpool before releasing the context fixes use-after-free detected by gcc thread-sanitizer on x86-64 for some reason llvm sanitizer is not detecting this issue.
		
			
				
	
	
		
			141 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			CMake
		
	
	
	
	
	
			
		
		
	
	
			141 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			CMake
		
	
	
	
	
	
function(llama_test target)
 | 
						|
    include(CMakeParseArguments)
 | 
						|
    set(options)
 | 
						|
    set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
 | 
						|
    set(multiValueArgs ARGS)
 | 
						|
    cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 | 
						|
 | 
						|
    if (NOT DEFINED LLAMA_TEST_LABEL)
 | 
						|
        set(LLAMA_TEST_LABEL "main")
 | 
						|
    endif()
 | 
						|
    if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
 | 
						|
        set(LLAMA_TEST_WORKING_DIRECTORY .)
 | 
						|
    endif()
 | 
						|
    if (DEFINED LLAMA_TEST_NAME)
 | 
						|
        set(TEST_NAME ${LLAMA_TEST_NAME})
 | 
						|
    else()
 | 
						|
        set(TEST_NAME ${target})
 | 
						|
    endif()
 | 
						|
 | 
						|
    set(TEST_TARGET ${target})
 | 
						|
 | 
						|
    add_test(
 | 
						|
        NAME ${TEST_NAME}
 | 
						|
        WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
 | 
						|
        COMMAND $<TARGET_FILE:${TEST_TARGET}>
 | 
						|
        ${LLAMA_TEST_ARGS})
 | 
						|
 | 
						|
    set_property(TEST ${TEST_NAME} PROPERTY LABELS ${LLAMA_TEST_LABEL})
 | 
						|
endfunction()
 | 
						|
 | 
						|
# Builds and runs a test source file.
 | 
						|
# Optional args:
 | 
						|
# - NAME: name of the executable & test target (defaults to the source file name without extension)
 | 
						|
# - LABEL: label for the test (defaults to main)
 | 
						|
# - ARGS: arguments to pass to the test executable
 | 
						|
# - WORKING_DIRECTORY
 | 
						|
function(llama_target_and_test source)
 | 
						|
    include(CMakeParseArguments)
 | 
						|
    set(options)
 | 
						|
    set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
 | 
						|
    set(multiValueArgs ARGS)
 | 
						|
    cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 | 
						|
 | 
						|
    if (NOT DEFINED LLAMA_TEST_LABEL)
 | 
						|
        set(LLAMA_TEST_LABEL "main")
 | 
						|
    endif()
 | 
						|
    if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
 | 
						|
        set(LLAMA_TEST_WORKING_DIRECTORY .)
 | 
						|
    endif()
 | 
						|
    if (DEFINED LLAMA_TEST_NAME)
 | 
						|
        set(TEST_TARGET ${LLAMA_TEST_NAME})
 | 
						|
    else()
 | 
						|
        get_filename_component(TEST_TARGET ${source} NAME_WE)
 | 
						|
    endif()
 | 
						|
 | 
						|
    add_executable(${TEST_TARGET} ${source} get-model.cpp)
 | 
						|
    install(TARGETS ${TEST_TARGET} RUNTIME)
 | 
						|
    target_link_libraries(${TEST_TARGET} PRIVATE common)
 | 
						|
    add_test(
 | 
						|
        NAME ${TEST_TARGET}
 | 
						|
        WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
 | 
						|
        COMMAND $<TARGET_FILE:${TEST_TARGET}>
 | 
						|
        ${LLAMA_TEST_ARGS})
 | 
						|
 | 
						|
    set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL})
 | 
						|
endfunction()
 | 
						|
 | 
						|
# build test-tokenizer-0 target once and add many tests
 | 
						|
add_executable(test-tokenizer-0 test-tokenizer-0.cpp)
 | 
						|
target_link_libraries(test-tokenizer-0 PRIVATE common)
 | 
						|
install(TARGETS test-tokenizer-0 RUNTIME)
 | 
						|
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-bert-bge          ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bert-bge.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-command-r.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder    ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm      ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon            ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2             ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-mpt               ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-phi-3             ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-phi-3.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2             ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-qwen2.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact            ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
 | 
						|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
 | 
						|
 | 
						|
# build test-tokenizer-1-bpe target once and add many tests
 | 
						|
add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)
 | 
						|
target_link_libraries(test-tokenizer-1-bpe PRIVATE common)
 | 
						|
install(TARGETS test-tokenizer-1-bpe RUNTIME)
 | 
						|
 | 
						|
# TODO: disabled due to slowness
 | 
						|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila    ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
 | 
						|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon    ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
 | 
						|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2     ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
 | 
						|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox  ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
 | 
						|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges)
 | 
						|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt       ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
 | 
						|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact    ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
 | 
						|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
 | 
						|
 | 
						|
# build test-tokenizer-1-spm target once and add many tests
 | 
						|
add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp)
 | 
						|
target_link_libraries(test-tokenizer-1-spm PRIVATE common)
 | 
						|
install(TARGETS test-tokenizer-1-spm RUNTIME)
 | 
						|
 | 
						|
llama_test(test-tokenizer-1-spm  NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
 | 
						|
#llama_test(test-tokenizer-1-spm  NAME test-tokenizer-1-baichuan  ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
 | 
						|
 | 
						|
# llama_target_and_test(test-double-float.cpp) # SLOW
 | 
						|
llama_target_and_test(test-log.cpp)
 | 
						|
llama_target_and_test(test-arg-parser.cpp)
 | 
						|
llama_target_and_test(test-quantize-fns.cpp)
 | 
						|
llama_target_and_test(test-quantize-perf.cpp)
 | 
						|
llama_target_and_test(test-sampling.cpp)
 | 
						|
llama_target_and_test(test-chat-template.cpp)
 | 
						|
 | 
						|
llama_target_and_test(test-grammar-parser.cpp)
 | 
						|
llama_target_and_test(test-llama-grammar.cpp)
 | 
						|
llama_target_and_test(test-grammar-integration.cpp)
 | 
						|
llama_target_and_test(test-grad0.cpp)
 | 
						|
llama_target_and_test(test-barrier.cpp)
 | 
						|
# llama_target_and_test(test-opt.cpp) # SLOW
 | 
						|
llama_target_and_test(test-backend-ops.cpp)
 | 
						|
 | 
						|
llama_target_and_test(test-rope.cpp)
 | 
						|
 | 
						|
llama_target_and_test(test-model-load-cancel.cpp  LABEL "model")
 | 
						|
llama_target_and_test(test-autorelease.cpp        LABEL "model")
 | 
						|
 | 
						|
# TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
 | 
						|
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
 | 
						|
    llama_target_and_test(test-json-schema-to-grammar.cpp   WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
 | 
						|
    target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
 | 
						|
endif()
 | 
						|
 | 
						|
# dummy executable - not installed
 | 
						|
get_filename_component(TEST_TARGET test-c.c NAME_WE)
 | 
						|
add_executable(${TEST_TARGET} test-c.c)
 | 
						|
target_link_libraries(${TEST_TARGET} PRIVATE llama)
 |