"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "a3205beffb6b3d2923fd9ad8e1ef8b4fd5f7ed29"
Unverified Commit f819265a authored by roikoren755's avatar roikoren755 Committed by GitHub
Browse files

Default to 'align' mamba cache mode for Mamba-based models when speculative...


Default to 'align' mamba cache mode for Mamba-based models when speculative decoding is enabled (#40454)
Signed-off-by: default avatarRoi Koren <roik@nvidia.com>
parent 936e0b79
...@@ -325,15 +325,26 @@ class MambaModelConfig(VerifyAndUpdateConfig): ...@@ -325,15 +325,26 @@ class MambaModelConfig(VerifyAndUpdateConfig):
if cache_config.enable_prefix_caching: if cache_config.enable_prefix_caching:
if cache_config.mamba_cache_mode == "none": if cache_config.mamba_cache_mode == "none":
cache_config.mamba_cache_mode = ( if (
"all" if model_config.supports_mamba_prefix_caching else "align" model_config.supports_mamba_prefix_caching
) and vllm_config.speculative_config is not None
logger.warning( ):
"Mamba cache mode is set to '%s' for %s by default " cache_config.mamba_cache_mode = "align"
"when prefix caching is enabled", logger.warning(
cache_config.mamba_cache_mode, "Mamba cache mode is set to 'align' for %s by default "
model_config.architecture, "when prefix caching and speculative decoding are enabled",
) model_config.architecture,
)
else:
cache_config.mamba_cache_mode = (
"all" if model_config.supports_mamba_prefix_caching else "align"
)
logger.warning(
"Mamba cache mode is set to '%s' for %s by default "
"when prefix caching is enabled",
cache_config.mamba_cache_mode,
model_config.architecture,
)
if ( if (
cache_config.mamba_cache_mode == "all" cache_config.mamba_cache_mode == "all"
and not model_config.supports_mamba_prefix_caching and not model_config.supports_mamba_prefix_caching
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment