mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	* Arm AArch64: optimized GEMV and GEMM kernels for q4_0_q8_0, and q8_0_q8_0 quantization
* Arm AArch64: add optimized GEMV and GEMM asm kernels for q4_0_q8_0 quantization and refactor code to address llama.cpp pr#5780 suggestions
* Arm AArch64: add optimized GEMV and GEMM asm kernels for q4_0_q8_0 quantization and refactor code to address llama.cpp pr#5780 suggestions
* Arm AArch64: add optimized GEMV and GEMM asm kernels for q4_0_q8_0 quantization and refactor code to address llama.cpp pr#5780 suggestions
* Arm AArch64: add optimized GEMV and GEMM asm kernels for q4_0_q8_0 quantization and refactor code to address llama.cpp pr#5780 suggestions
* Arm AArch64: add copyright claim only to ggml-aarch64.cpp and ggml-aarch64.h files
* Arm AArch64: minor code refactoring for rebase
* Arm AArch64: minor code refactoring for resolving a build issue with cmake
* Arm AArch64: minor code refactoring to split the Q4_0_AARC64 type into three separate types: Q4_0_4_4, Q4_0_4_8, and Q4_0_8_8
* Arm AArch64: minor code change for resolving a build issue with server-windows
* retrigger checks
* Arm AArch64: minor code changes for rebase
* Arm AArch64: minor changes to skip the pr#7433 vec_dot code for arm cpus with SVE VL not equal to 256 bits
* Arm AArch64: remove stale LLAMA_QKK_64 from CMakeLists.txt and delete build.zig
* Arm AArch64: add reference scalar gemm and gemv, and avoid dynamic memory allocations during quantization for Q4_0_4_4, Q4_0_4_8, and Q4_0_8_8
* Arm AArch64: add multithreaded quantization support for the new types: Q4_0_4_4, Q4_0_4_8, and Q4_0_8_8
* Arm AArch64: minor code refactoring
* Arm AArch64: simplify logic for calling gemm and gemv functions in ggml_compute_forward_mul_mat
* Arm AArch64: minimize changes in ggml_compute_forward_mul_mat
* Arm AArch64: minor code refactoring, and add reference scalar code to quantize routines for new quant types
* Arm AArch64: minor code refactoring
* Arm AArch64: minor code refactoring
* Arm AArch64: minor code refactoring
* rebase on the latest master commit 3fd62a6 and adapt to the new directory structure
* Arm AArch64: remove a redundant comment
* Arm AArch64: add pragma in ggml-aarch64.c to turn -Woverlength-strings warning off
* Arm AArch64: use __aarch64__ check to guard 64-bit neon kernels
* Arm AArch64: update docs/build.md README to include compile time flags for buiilding the Q4_0_4_4 quant type
		
	
		
			
				
	
	
		
			77 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Swift
		
	
	
	
	
	
			
		
		
	
	
			77 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Swift
		
	
	
	
	
	
// swift-tools-version:5.5
 | 
						|
 | 
						|
import PackageDescription
 | 
						|
 | 
						|
var sources = [
 | 
						|
    "src/llama.cpp",
 | 
						|
    "src/unicode.cpp",
 | 
						|
    "src/unicode-data.cpp",
 | 
						|
    "ggml/src/ggml.c",
 | 
						|
    "ggml/src/ggml-alloc.c",
 | 
						|
    "ggml/src/ggml-backend.c",
 | 
						|
    "ggml/src/ggml-quants.c",
 | 
						|
    "ggml/src/ggml-aarch64.c",
 | 
						|
]
 | 
						|
 | 
						|
var resources: [Resource] = []
 | 
						|
var linkerSettings: [LinkerSetting] = []
 | 
						|
var cSettings: [CSetting] =  [
 | 
						|
    .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
 | 
						|
    .unsafeFlags(["-fno-objc-arc"]),
 | 
						|
    // NOTE: NEW_LAPACK will required iOS version 16.4+
 | 
						|
    // We should consider add this in the future when we drop support for iOS 14
 | 
						|
    // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
 | 
						|
    // .define("ACCELERATE_NEW_LAPACK"),
 | 
						|
    // .define("ACCELERATE_LAPACK_ILP64")
 | 
						|
]
 | 
						|
 | 
						|
#if canImport(Darwin)
 | 
						|
sources.append("ggml/src/ggml-metal.m")
 | 
						|
resources.append(.process("ggml/src/ggml-metal.metal"))
 | 
						|
linkerSettings.append(.linkedFramework("Accelerate"))
 | 
						|
cSettings.append(
 | 
						|
    contentsOf: [
 | 
						|
        .define("GGML_USE_ACCELERATE"),
 | 
						|
        .define("GGML_USE_METAL")
 | 
						|
    ]
 | 
						|
)
 | 
						|
#endif
 | 
						|
 | 
						|
#if os(Linux)
 | 
						|
    cSettings.append(.define("_GNU_SOURCE"))
 | 
						|
#endif
 | 
						|
 | 
						|
let package = Package(
 | 
						|
    name: "llama",
 | 
						|
    platforms: [
 | 
						|
        .macOS(.v12),
 | 
						|
        .iOS(.v14),
 | 
						|
        .watchOS(.v4),
 | 
						|
        .tvOS(.v14)
 | 
						|
    ],
 | 
						|
    products: [
 | 
						|
        .library(name: "llama", targets: ["llama"]),
 | 
						|
    ],
 | 
						|
    targets: [
 | 
						|
        .target(
 | 
						|
            name: "llama",
 | 
						|
            path: ".",
 | 
						|
            exclude: [
 | 
						|
               "cmake",
 | 
						|
               "examples",
 | 
						|
               "scripts",
 | 
						|
               "models",
 | 
						|
               "tests",
 | 
						|
               "CMakeLists.txt",
 | 
						|
               "Makefile"
 | 
						|
            ],
 | 
						|
            sources: sources,
 | 
						|
            resources: resources,
 | 
						|
            publicHeadersPath: "spm-headers",
 | 
						|
            cSettings: cSettings,
 | 
						|
            linkerSettings: linkerSettings
 | 
						|
        )
 | 
						|
    ],
 | 
						|
    cxxLanguageStandard: .cxx11
 | 
						|
)
 |