Merge branch 'master' into compilade/refactor-kv-cache

2025-11-02 09:12:03 +00:00 · 2025-07-03 16:03:56 -04:00
parent 908e6559d6 bee28421be
commit 4682e21c46
89 changed files with 895 additions and 4844 deletions
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -714,8 +714,8 @@ class GGUFWriter:
    def add_clamp_kqv(self, value: float) -> None:
        self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)

-    def add_shared_kv_layers(self, value: float) -> None:
-        self.add_float32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
+    def add_shared_kv_layers(self, value: int) -> None:
+        self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)

    def add_sliding_window_pattern(self, value: Sequence[bool]) -> None:
        self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value)
--- a/gguf-py/gguf/vocab.py
+++ b/gguf-py/gguf/vocab.py
@@ -245,9 +245,18 @@ class SpecialVocab:
        if not tokenizer_config:
            return True
        chat_template_alt = None
-        chat_template_file = path / 'chat_template.json'
-        if chat_template_file.is_file():
-            with open(chat_template_file, encoding = 'utf-8') as f:
+        chat_template_json = path / 'chat_template.json'
+        chat_template_jinja = path / 'chat_template.jinja'
+        if chat_template_jinja.is_file():
+            with open(chat_template_jinja, encoding = 'utf-8') as f:
+                chat_template_alt = f.read()
+            if additional_templates := list((path / 'additional_chat_templates').glob('*.jinja')):
+                chat_template_alt = [{'name': 'default', 'template': chat_template_alt}]
+                for template_path in additional_templates:
+                    with open(template_path, encoding = 'utf-8') as fp:
+                        chat_template_alt.append({'name': template_path.stem, 'template': fp.read()})
+        elif chat_template_json.is_file():
+            with open(chat_template_json, encoding = 'utf-8') as f:
                chat_template_alt = json.load(f).get('chat_template')
        chat_template = tokenizer_config.get('chat_template', chat_template_alt)
        if chat_template is None or isinstance(chat_template, (str, list)):