mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-08 10:07:01 +00:00
falcon-h1 specefic vocab resolved
This commit is contained in:
@@ -4882,9 +4882,6 @@ class Mamba2Model(TextModel):
|
|||||||
pad_vocab = self.hparams.get("pad_vocab_size_multiple", 16)
|
pad_vocab = self.hparams.get("pad_vocab_size_multiple", 16)
|
||||||
# pad using ceiling division
|
# pad using ceiling division
|
||||||
# ref: https://stackoverflow.com/a/17511341/22827863
|
# ref: https://stackoverflow.com/a/17511341/22827863
|
||||||
# if architecture is FalconH1, don't pad vocab size
|
|
||||||
if self.hparams.get("architectures", [None])[0] == "FalconH1ForCausalLM":
|
|
||||||
pad_vocab = 1
|
|
||||||
vocab_size = -(vocab_size // -pad_vocab) * pad_vocab
|
vocab_size = -(vocab_size // -pad_vocab) * pad_vocab
|
||||||
self.hparams["vocab_size"] = vocab_size
|
self.hparams["vocab_size"] = vocab_size
|
||||||
|
|
||||||
@@ -6590,6 +6587,9 @@ class FalconH1Model(Mamba2Model):
|
|||||||
keys = list(keys) + prefixed
|
keys = list(keys) + prefixed
|
||||||
return super().find_hparam(keys, *args, **kwargs)
|
return super().find_hparam(keys, *args, **kwargs)
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
self._set_vocab_gpt2()
|
||||||
|
|
||||||
def _generate_mup_vector(self, block_id: int) -> torch.Tensor:
|
def _generate_mup_vector(self, block_id: int) -> torch.Tensor:
|
||||||
zxbcdt_multipliers = self.hparams["ssm_multipliers"]
|
zxbcdt_multipliers = self.hparams["ssm_multipliers"]
|
||||||
intermediate_size = self.hparams["mamba_d_ssm"]
|
intermediate_size = self.hparams["mamba_d_ssm"]
|
||||||
|
|||||||
Reference in New Issue
Block a user