Unverified Commit f45870b5 authored by tianshu-Michael-yu's avatar tianshu-Michael-yu Committed by GitHub
Browse files

fix: allow LFM2 MoE prefix caching (align) (#33376)


Signed-off-by: default avatarTianshu Yu <tianshuyu.formal@gmail.com>
parent ba45bedf
......@@ -651,8 +651,10 @@ class Lfm2MoeForCausalLM(
quant_config = vllm_config.quant_config
cache_config = vllm_config.cache_config
assert not cache_config.enable_prefix_caching, (
"Lfm2Moe currently does not support prefix caching"
if cache_config.mamba_cache_mode == "all":
raise NotImplementedError(
"Lfm2Moe currently does not support 'all' prefix caching, "
"please use '--mamba-cache-mode=align' instead"
)
super().__init__()
......
......@@ -22,6 +22,8 @@ from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.forward_context import set_forward_context
from vllm.model_executor.layers.mamba.mamba_utils import (
MambaStateCopyFunc,
MambaStateCopyFuncCalculator,
MambaStateDtypeCalculator,
MambaStateShapeCalculator,
)
......@@ -584,6 +586,10 @@ class Lfm2VLForConditionalGeneration(
conv_kernel=hf_language_config.conv_L_cache,
)
@classmethod
def get_mamba_state_copy_func(cls) -> tuple[MambaStateCopyFunc]:
return MambaStateCopyFuncCalculator.short_conv_state_copy_func()
def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
super().__init__()
config: Lfm2VlConfig = vllm_config.model_config.hf_config
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment