mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	gguf : rm references to old file formats
This commit is contained in:
		@@ -467,13 +467,11 @@ struct llama_load_tensors_map {
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum gguf_file_version {
 | 
			
		||||
    gguf_file_VERSION_GGML,
 | 
			
		||||
    gguf_file_VERSION_GGMF_V1, // added version field and scores in vocab
 | 
			
		||||
    gguf_file_VERSION_GGJT_V1, // added padding
 | 
			
		||||
    gguf_file_VERSION_GGJT_V2, // changed quantization format
 | 
			
		||||
    gguf_file_VERSION_GGJT_V3, // changed Q4 and Q8 quantization format
 | 
			
		||||
    GGUF_FILE_VERSION_V1
 | 
			
		||||
    
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
struct gguf_file_loader {
 | 
			
		||||
    gguf_file file;
 | 
			
		||||
    gguf_context * gguf_ctx;
 | 
			
		||||
@@ -1069,11 +1067,7 @@ int64_t llama_time_us() {
 | 
			
		||||
 | 
			
		||||
static const char *gguf_file_version_name(gguf_file_version version) {
 | 
			
		||||
    switch (version) {
 | 
			
		||||
        case gguf_file_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)";
 | 
			
		||||
        case gguf_file_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)";
 | 
			
		||||
        case gguf_file_VERSION_GGJT_V1: return "ggjt v1 (pre #1405)";
 | 
			
		||||
        case gguf_file_VERSION_GGJT_V2: return "ggjt v2 (pre #1508)";
 | 
			
		||||
        case gguf_file_VERSION_GGJT_V3: return "ggjt v3 (latest)";
 | 
			
		||||
        case GGUF_FILE_VERSION_V1: return "GGUF V1 (latest)";
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    return "unknown";
 | 
			
		||||
@@ -1205,21 +1199,11 @@ static void llama_model_load_internal(
 | 
			
		||||
        fprintf(stderr, "%s: model size = %s\n",   __func__, llama_model_type_name(model.type));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (file_version < gguf_file_VERSION_GGJT_V2) {
 | 
			
		||||
        if (hparams.ftype != LLAMA_FTYPE_ALL_F32     &&
 | 
			
		||||
            hparams.ftype != LLAMA_FTYPE_MOSTLY_F16  &&
 | 
			
		||||
            hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {
 | 
			
		||||
            throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)"));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (file_version < gguf_file_VERSION_GGJT_V3) {
 | 
			
		||||
    if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
 | 
			
		||||
        hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
 | 
			
		||||
        hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
 | 
			
		||||
        throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)"));
 | 
			
		||||
    }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    if (vocab_only) {
 | 
			
		||||
        return;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user