fix: allow LFM2 MoE prefix caching (align) (#33376)

Signed-off-by: Tianshu Yu <tianshuyu.formal@gmail.com>

fix: allow LFM2 MoE prefix caching (align) (#33376)
Signed-off-by: Tianshu Yu <tianshuyu.formal@gmail.com>
f45870b5 · tianshu-Michael-yu · GitHub · ba45bedf · f45870b5 · f45870b5
Unverified Commit f45870b5 authored Jan 30, 2026 by tianshu-Michael-yu Committed by GitHub Jan 30, 2026
Show whitespace changes
Inline Side-by-side

Showing with 11 additions and 3 deletions

vllm/model_executor/models/lfm2_moe.py vllm/model_executor/models/lfm2_moe.py +5 -3

vllm/model_executor/models/lfm2_vl.py vllm/model_executor/models/lfm2_vl.py +6 -0

No files found.
--- a/vllm/model_executor/models/lfm2_moe.py
+++ b/vllm/model_executor/models/lfm2_moe.py
@@ -651,8 +651,10 @@ class Lfm2MoeForCausalLM(
        quant_config = vllm_config.quant_config
        cache_config = vllm_config.cache_config

-        assert not cache_config.enable_prefix_caching, (
-            "Lfm2Moe currently does not support prefix caching"
+        if cache_config.mamba_cache_mode == "all":
+            raise NotImplementedError(
+                "Lfm2Moe currently does not support 'all' prefix caching, "
+                "please use '--mamba-cache-mode=align' instead"
            )

        super().__init__()

--- a/vllm/model_executor/models/lfm2_vl.py
+++ b/vllm/model_executor/models/lfm2_vl.py
@@ -22,6 +22,8 @@ from vllm.config import VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
 from vllm.forward_context import set_forward_context
 from vllm.model_executor.layers.mamba.mamba_utils import (
+    MambaStateCopyFunc,
+    MambaStateCopyFuncCalculator,
    MambaStateDtypeCalculator,
    MambaStateShapeCalculator,
 )
@@ -584,6 +586,10 @@ class Lfm2VLForConditionalGeneration(
            conv_kernel=hf_language_config.conv_L_cache,
        )

+    @classmethod
+    def get_mamba_state_copy_func(cls) -> tuple[MambaStateCopyFunc]:
+        return MambaStateCopyFuncCalculator.short_conv_state_copy_func()
+
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
        super().__init__()
        config: Lfm2VlConfig = vllm_config.model_config.hf_config