From 6597bcbceceb28deb1973b7aee42152fcb6a58d2 Mon Sep 17 00:00:00 2001 From: limloop Date: Fri, 15 May 2026 04:04:52 +0300 Subject: [PATCH 1/5] mamba2: remove hardcoded 2x expansion factor, support any expand value --- convert_hf_to_gguf.py | 8 +++----- src/models/mamba2.cpp | 13 +++++++------ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 5cff86848565..b449841b493a 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -8552,7 +8552,8 @@ def __init__(self, dir_model: Path, *args, **kwargs): hparams["text_config"] = hparams["llm_config"] super().__init__(dir_model, *args, hparams=hparams, **kwargs) self.d_model = self.find_hparam(["hidden_size", "d_model", "dim"]) - self.d_inner = self.find_hparam(["mamba_d_ssm", "intermediate_size", "d_inner"], optional=True) or 2 * self.d_model + self.expand = self.find_hparam(["expand"]) + self.d_inner = self.find_hparam(["mamba_d_ssm", "intermediate_size", "d_inner"], optional=True) or self.expand * self.d_model self.n_group = self.find_hparam(["n_groups"], optional=True) or 1 def set_vocab(self): @@ -8582,11 +8583,8 @@ def set_gguf_parameters(self): rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5 - # Fail early for models which don't have a block expansion factor of 2 - # TODO: does this really matter? - # skip the assertion for FalconH1 Model if self.model_arch != gguf.MODEL_ARCH.FALCON_H1: - assert self.d_inner == 2 * self.d_model + assert self.d_inner == self.expand * self.d_model assert self.d_inner % head_dim == 0 self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default diff --git a/src/models/mamba2.cpp b/src/models/mamba2.cpp index 3277ca53ec4b..87eefccb4295 100644 --- a/src/models/mamba2.cpp +++ b/src/models/mamba2.cpp @@ -39,10 +39,11 @@ void llama_model_mamba2::load_arch_tensors(llama_model_loader &) { const int64_t d_inner = hparams.ssm_d_inner; const int64_t d_state = hparams.ssm_d_state; const int64_t n_group = hparams.ssm_n_group; - const int64_t d_in_proj = 2*d_inner + 2*n_group*d_state + n_head; + const int64_t dt_rank = hparams.ssm_dt_rank; + + const int64_t conv_dim = d_inner + 2 * n_group * d_state; + const int64_t d_in_proj = d_inner + conv_dim + dt_rank; - // only an expansion factor of 2 is supported for now - GGML_ASSERT(2 * n_embd == d_inner); tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); @@ -68,11 +69,11 @@ void llama_model_mamba2::load_arch_tensors(llama_model_loader &) { layer.ssm_conv1d = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {d_conv, d_inner + 2*n_group*d_state}, 0); layer.ssm_conv1d_b = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "bias", i), {d_inner + 2*n_group*d_state}, 0); - layer.ssm_dt_b = create_tensor(tn(LLM_TENSOR_SSM_DT, "bias", i), {n_head}, 0); + layer.ssm_dt_b = create_tensor(tn(LLM_TENSOR_SSM_DT, "bias", i), {dt_rank}, 0); // no "weight" suffix for these - layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), {1, n_head}, 0); - layer.ssm_d = create_tensor(tn(LLM_TENSOR_SSM_D, i), {1, n_head}, 0); + layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), {1, dt_rank}, 0); + layer.ssm_d = create_tensor(tn(LLM_TENSOR_SSM_D, i), {1, dt_rank}, 0); layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), {d_inner / n_group, n_group}, 0); From 59b0a730794ccb9d758181bd198cc3187e2c37d0 Mon Sep 17 00:00:00 2001 From: limloop Date: Fri, 15 May 2026 04:05:07 +0300 Subject: [PATCH 2/5] mamba2: remove invalid d_inner %% d_state check (unrelated parameters) --- src/models/mamba-base.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/models/mamba-base.cpp b/src/models/mamba-base.cpp index c37f29c487ed..fd3fe3f03230 100644 --- a/src/models/mamba-base.cpp +++ b/src/models/mamba-base.cpp @@ -169,7 +169,6 @@ ggml_tensor * llm_build_mamba_base::build_mamba2_layer(llm_graph_input_rs * inp, GGML_ASSERT(ubatch.equal_seqs()); GGML_ASSERT(ubatch.n_tokens == n_seq_tokens * n_seqs); GGML_ASSERT(d_inner % n_head == 0); - GGML_ASSERT(d_inner % d_state == 0); GGML_ASSERT(d_inner % n_group == 0); ggml_tensor * conv_states_all = mctx_cur->get_r_l(il); From 4c49edbb8838eb0edfc284ccf896edc0aff20e28 Mon Sep 17 00:00:00 2001 From: Arsen Arutunan <58118221+limloop@users.noreply.github.com> Date: Fri, 15 May 2026 19:18:26 +0300 Subject: [PATCH 3/5] Update convert_hf_to_gguf.py: make expand optional with default 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sigbjørn Skjæret --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index b449841b493a..1f1e20cff461 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -8552,7 +8552,7 @@ def __init__(self, dir_model: Path, *args, **kwargs): hparams["text_config"] = hparams["llm_config"] super().__init__(dir_model, *args, hparams=hparams, **kwargs) self.d_model = self.find_hparam(["hidden_size", "d_model", "dim"]) - self.expand = self.find_hparam(["expand"]) + self.expand = self.find_hparam(["expand"], optional=True) or 2 self.d_inner = self.find_hparam(["mamba_d_ssm", "intermediate_size", "d_inner"], optional=True) or self.expand * self.d_model self.n_group = self.find_hparam(["n_groups"], optional=True) or 1 From 44c5f63b8c6bf5b5805121503472338d539c5bc4 Mon Sep 17 00:00:00 2001 From: limloop Date: Fri, 15 May 2026 21:42:15 +0300 Subject: [PATCH 4/5] mamba2: apply expand fix to refactored conversion/mamba.py --- conversion/mamba.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/conversion/mamba.py b/conversion/mamba.py index be0e36a29bad..d13a4135b97a 100644 --- a/conversion/mamba.py +++ b/conversion/mamba.py @@ -114,7 +114,8 @@ def __init__(self, dir_model: Path, *args, **kwargs): hparams["text_config"] = hparams["llm_config"] super().__init__(dir_model, *args, hparams=hparams, **kwargs) self.d_model = self.find_hparam(["hidden_size", "d_model", "dim"]) - self.d_inner = self.find_hparam(["mamba_d_ssm", "intermediate_size", "d_inner"], optional=True) or 2 * self.d_model + self.expand = self.find_hparam(["expand"], optional=True) or 2 + self.d_inner = self.find_hparam(["mamba_d_ssm", "intermediate_size", "d_inner"], optional=True) or self.expand * self.d_model self.n_group = self.find_hparam(["n_groups"], optional=True) or 1 def set_vocab(self): @@ -144,11 +145,9 @@ def set_gguf_parameters(self): rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5 - # Fail early for models which don't have a block expansion factor of 2 - # TODO: does this really matter? # skip the assertion for FalconH1 Model if self.model_arch != gguf.MODEL_ARCH.FALCON_H1: - assert self.d_inner == 2 * self.d_model + assert self.d_inner == self.expand * self.d_model assert self.d_inner % head_dim == 0 self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default From ea081b503ce568b92a936b4c12c9f94d9955534a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <1629204+CISC@users.noreply.github.com> Date: Thu, 25 Jun 2026 20:59:16 +0200 Subject: [PATCH 5/5] also check for mamba_expand --- conversion/mamba.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conversion/mamba.py b/conversion/mamba.py index d13a4135b97a..43d559ffb0ae 100644 --- a/conversion/mamba.py +++ b/conversion/mamba.py @@ -114,7 +114,7 @@ def __init__(self, dir_model: Path, *args, **kwargs): hparams["text_config"] = hparams["llm_config"] super().__init__(dir_model, *args, hparams=hparams, **kwargs) self.d_model = self.find_hparam(["hidden_size", "d_model", "dim"]) - self.expand = self.find_hparam(["expand"], optional=True) or 2 + self.expand = self.find_hparam(["mamba_expand", "expand"], optional=True) or 2 self.d_inner = self.find_hparam(["mamba_d_ssm", "intermediate_size", "d_inner"], optional=True) or self.expand * self.d_model self.n_group = self.find_hparam(["n_groups"], optional=True) or 1