mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	gguf-py, convert-hf : model conversion support for T5 and FLAN-T5 model variants (#5763)
* gguf-py : add T5 model architecture * gguf-py : add separate tensors for encoder and decoder * gguf-py : add new model header parameters: decoder_start_token_id, attention.relative_buckets_count, tokenizer.ggml.remove_extra_whitespaces, tokenizer.ggml.precompiled_charsmap * convert-hf : add model conversion support for T5ForConditionalGeneration and T5WithLMHeadModel --------- Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
This commit is contained in:
		| @@ -24,6 +24,7 @@ class TensorNameMap: | ||||
|             "backbone.embedding",                        # mamba | ||||
|             "backbone.embeddings",                       # mamba-hf | ||||
|             "transformer.in_out_embed",                  # Grok | ||||
|             "shared",                                    # t5 | ||||
|         ), | ||||
|  | ||||
|         # Token type embeddings | ||||
| @@ -421,6 +422,120 @@ class TensorNameMap: | ||||
|         MODEL_TENSOR.FFN_SUB_NORM: ( | ||||
|             "model.layers.{bid}.mlp.ffn_layernorm",  # bitnet | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_ATTN_NORM: ( | ||||
|             "decoder.block.{bid}.layer.0.layer_norm", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_ATTN_Q: ( | ||||
|             "decoder.block.{bid}.layer.0.SelfAttention.q", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_ATTN_K: ( | ||||
|             "decoder.block.{bid}.layer.0.SelfAttention.k", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_ATTN_V: ( | ||||
|             "decoder.block.{bid}.layer.0.SelfAttention.v", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_ATTN_OUT: ( | ||||
|             "decoder.block.{bid}.layer.0.SelfAttention.o", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_ATTN_REL_B: ( | ||||
|             "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_CROSS_ATTN_NORM: ( | ||||
|             "decoder.block.{bid}.layer.1.layer_norm", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_CROSS_ATTN_Q: ( | ||||
|             "decoder.block.{bid}.layer.1.EncDecAttention.q", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_CROSS_ATTN_K: ( | ||||
|             "decoder.block.{bid}.layer.1.EncDecAttention.k", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_CROSS_ATTN_V: ( | ||||
|             "decoder.block.{bid}.layer.1.EncDecAttention.v", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_CROSS_ATTN_OUT: ( | ||||
|             "decoder.block.{bid}.layer.1.EncDecAttention.o", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: ( | ||||
|             "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_FFN_NORM: ( | ||||
|             "decoder.block.{bid}.layer.2.layer_norm", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_FFN_GATE: ( | ||||
|             "decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_FFN_UP: ( | ||||
|             "decoder.block.{bid}.layer.2.DenseReluDense.wi",   # t5 | ||||
|             "decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_FFN_DOWN: ( | ||||
|             "decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.DEC_OUTPUT_NORM: ( | ||||
|             "decoder.final_layer_norm", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_ATTN_NORM: ( | ||||
|             "encoder.block.{bid}.layer.0.layer_norm", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_ATTN_Q: ( | ||||
|             "encoder.block.{bid}.layer.0.SelfAttention.q", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_ATTN_K: ( | ||||
|             "encoder.block.{bid}.layer.0.SelfAttention.k", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_ATTN_V: ( | ||||
|             "encoder.block.{bid}.layer.0.SelfAttention.v", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_ATTN_OUT: ( | ||||
|             "encoder.block.{bid}.layer.0.SelfAttention.o", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_ATTN_REL_B: ( | ||||
|             "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_FFN_NORM: ( | ||||
|             "encoder.block.{bid}.layer.1.layer_norm", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_FFN_GATE: ( | ||||
|             "encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_FFN_UP: ( | ||||
|             "encoder.block.{bid}.layer.1.DenseReluDense.wi",   # t5 | ||||
|             "encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_FFN_DOWN: ( | ||||
|             "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ENC_OUTPUT_NORM: ( | ||||
|             "encoder.final_layer_norm", # t5 | ||||
|         ), | ||||
|     } | ||||
|  | ||||
|     # architecture-specific block mappings | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 fairydreaming
					fairydreaming