mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	CANN: Update CANN model support (#13162)
* Update CANN model support status * Update of model support * update * update * update * fix format of CANN.md * fix format of CANN.md * fix format of CANN.md
This commit is contained in:
		| @@ -56,60 +56,82 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi | |||||||
|  |  | ||||||
| ## Model Supports | ## Model Supports | ||||||
|  |  | ||||||
| | Model Name                  | FP16  | Q8_0 | Q4_0 | | | Model Name                  | FP16  | Q4_0 | Q8_0 | | ||||||
| |:----------------------------|:-----:|:----:|:----:| | |:----------------------------|:-----:|:----:|:----:| | ||||||
| | AquilaChat2-7B              |   √   |   √  |   √  | | | Llama-2                     |   √   |   √  |   √  | | ||||||
| | Baichuan-7b                 |   √   |   √  |   √  | | | Llama-3                     |   √   |   √  |   √  | | ||||||
| | Baichuan2-7B-Chat           |   √   |   √  |   √  | | | Mistral-7B                  |   √   |   √  |   √  | | ||||||
| | bitnet_b1_58-large          |   √   |   √  |   √  | | | Mistral MOE                 |   √   |   √  |   √  | | ||||||
| | bloom-560m                  |   √   |   x  |   √  | | | DBRX                        |   -   |   -  |   -  | | ||||||
| | bloomz-alpaca-560m          |   √   |   x  |   √  | | | Falcon                      |   √   |   √  |   √  | | ||||||
| | c4ai-command-r-35B-v01      |   x   |   x  |   x  | | | Chinese LLaMA/Alpaca        |   √   |   √  |   √  | | ||||||
| | chatglm3-6B                 |   x   |   x  |   x  | | | Vigogne(French)             |   √   |   √  |   √  | | ||||||
| | chinese-alpaca-2-1.3b       |   √   |   √  |   √  | | | BERT                        |   x   |   x  |   x  | | ||||||
| | CodeShell-7B                |   √   |   √  |   √  | | | Koala                       |   √   |   √  |   √  | | ||||||
| | deepseek-ai_deepseek-coder-1.3B-base | x |   x  |   x  | | | Baichuan                    |   √   |   √  |   √  | | ||||||
| | deepseek-ai_DeepSeek-V2-Lite | x   |   x  |   x   | | | Aquila 1 & 2                |   √   |   √  |   √  | | ||||||
| | deepseek-coder-6.7B-instruct | x   |   x  |   x   | | | Starcoder models            |   √   |   √  |   √  | | ||||||
| | DeepSeek-V2-Lite-64x1.5B    |   x   |   x  |   x  | | | Refact                      |   √   |   √  |   √  | | ||||||
| | falcon-7b-instruct          |   √   |   √  |   √  | | | MPT                         |   √   |   √  |   √  | | ||||||
| | flan-t5-large               |   √   |   √  |   √  | | | Bloom                       |   √   |   √  |   √  | | ||||||
| | gemma-2-9b-it               |   √   |   √  |   √  | | | Yi models                   |   √   |   √  |   √  | | ||||||
| | glm-4-9B                    |   x   |   x  |   x  | | | stablelm models             |   √   |   √  |   √  | | ||||||
| | gpt2                        |   √   |   √  |   √  | | | DeepSeek models             |   x   |   x  |   x  | | ||||||
| | Gpt2-163M                   |   √   |   √  |   √  | | | Qwen models                 |   √   |   √  |   √  | | ||||||
| | granite-3B-code-instruct    |   √   |   √  |   √  | | | PLaMo-13B                   |   √   |   √  |   √  | | ||||||
|  | | Phi models                  |   √   |   √  |   √  | | ||||||
|  | | PhiMoE                      |   √   |   √  |   √  | | ||||||
|  | | GPT-2                       |   √   |   √  |   √  | | ||||||
|  | | Orion                       |   √   |   √  |   √  | | ||||||
|  | | InternlLM2                  |   √   |   √  |   √  | | ||||||
|  | | CodeShell                   |   √   |   √  |   √  | | ||||||
|  | | Gemma                       |   √   |   √  |   √  | | ||||||
|  | | Mamba                       |   √   |   √  |   √  | | ||||||
|  | | Xverse                      |   √   |   √  |   √  | | ||||||
|  | | command-r models            |   √   |   √  |   √  | | ||||||
|  | | Grok-1                      |   -   |   -  |   -  | | ||||||
|  | | SEA-LION                    |   √   |   √  |   √  | | ||||||
| | GritLM-7B                   |   √   |   √  |   √  | | | GritLM-7B                   |   √   |   √  |   √  | | ||||||
| | internlm2_5-7b-chat         |   √   |   √  |   √  | | | OLMo                        |   √   |   √  |   √  | | ||||||
| | koala-7B-HF                 |   √   |   √  |   √  | | | OLMo 2                      |   √   |   √  |   √  | | ||||||
| | Llama-2-7b-chat-hf          |   √   |   √  |   √  | | | OLMoE                       |   √   |   √  |   √  | | ||||||
| | Llama-3-Smaug-8B            |   √   |   √  |   √  | | | Granite models              |   √   |   √  |   √  | | ||||||
| | Llama2-Chinese-7b-Chat      |   √   |   √  |   √  | | | GPT-NeoX                    |   √   |   √  |   √  | | ||||||
| | Llama3-8B                   |   √   |   √  |   √  | | | Pythia                      |   √   |   √  |   √  | | ||||||
| | Llama3-8b-chinese           |   √   |   √  |   √  | | | Snowflake-Arctic MoE        |   -   |   -  |   -  | | ||||||
| | mamba-130m-hf               |   √   |   √  |   √  | | | Smaug                       |   √   |   √  |   √  | | ||||||
| | Mistral-7B-Instruct-v0.2    |   √   |   √  |   √  | | | Poro 34B                    |   √   |   √  |   √  | | ||||||
| | Mixtral-8x7B-Instruct-v0.1  |   x   |   √  |   √  | | | Bitnet b1.58 models         |   √   |   x  |   x  | | ||||||
| | mpt-7B                      |   √   |   √  |   √  | | | Flan-T5                     |   √   |   √  |   √  | | ||||||
| | OLMo-1B-hf                  |   √   |   √  |   √  | | | Open Elm models             |   x   |   √  |   √  | | ||||||
| | OpenELM-3B-Instruct         |   √   |   √  |   √  | | | chatGLM3-6B + ChatGLM4-9b +  GLMEdge-1.5b + GLMEdge-4b    |   √   |   √  |   √  | | ||||||
| | Orion-14b-base              |   √   |   √  |   √  | | | GLM-4-0414                  |   √   |   √  |   √  | | ||||||
| | phi1                        |   x   |   x  |   x  | | | SmolLM                      |   √   |   √  |   √  | | ||||||
| | phi2                        |   x   |   x  |   x  | | | EXAONE-3.0-7.8B-Instruct    |   √   |   √  |   √  | | ||||||
| | Phi-3-mini-4k-instruct      |   √   |   √  |   √  | | | FalconMamba Models          |   √   |   √  |   √  | | ||||||
| | plamo-13b                   |   √   |   √  |   √  | | | Jais Models                 |   -   |   x  |   x  | | ||||||
| | pythia-70M                  |   x   |   x  |   x  | | | Bielik-11B-v2.3             |   √   |   √  |   √  | | ||||||
| | Qwen-7B                     |   √   |   √  |   √  | | | RWKV-6                      |   -   |   √  |   √  | | ||||||
| | Qwen2-1.5B-Instruct         |   √   |   x  |   √  | | | QRWKV-6                     |   √   |   √  |   √  | | ||||||
| | Refact-1_6B-fim             |   √   |   √  |   √  | | | GigaChat-20B-A3B            |   x   |   x  |   x  | | ||||||
| | SmolLM-135M                 |   √   |   √  |   √  | | | Trillion-7B-preview         |   √   |   √  |   √  | | ||||||
| | stablelm-zephyr             |   x   |   x  |   x  | | | Ling models                 |   √   |   √  |   √  | | ||||||
| | stablelm-2-zephyr-1_6b      |   x   |   x  |   x  | |  | ||||||
| | starcoderbase-1b            |   √   |   √  |   √  | |  | ||||||
| | starcoder2-3b               |   √   |   √  |   √  | | **Multimodal** | ||||||
| | vigogne-7b-chat             |   √   |   √  |   √  | | | Model Name                  | FP16  | Q4_0 | Q8_0 | | ||||||
| | xverse-7b-chat              |   √   |   √  |   √  | | |:----------------------------|:-----:|:----:|:----:| | ||||||
| | Yi-6b-Chat                  |   √   |   √  |   √  | | | LLaVA 1.5 models, LLaVA 1.6 models      |   x   |   x  |   x  | | ||||||
|  | |  BakLLaVA                   |   √   |   √  |   √  | | ||||||
|  | |  Obsidian                   |   √   |   -  |   -  | | ||||||
|  | |  ShareGPT4V                 |   x   |   -  |   -  | | ||||||
|  | |  MobileVLM 1.7B/3B models   |   -   |   -  |   -  | | ||||||
|  | |  Yi-VL                      |   -   |   -  |   -  | | ||||||
|  | |  Mini CPM                   |   √   |   √  |   √  | | ||||||
|  | |  Moondream                  |   √   |   √  |   √  | | ||||||
|  | |  Bunny                      |   √   |   -  |   -  | | ||||||
|  | |  GLM-EDGE                   |   √   |   √  |   √  | | ||||||
|  | |  Qwen2-VL                   |   √   |   √  |   √  | | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Xinpeng Dou
					Xinpeng Dou