"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "09ad3b76b320fffcb6b0214bd90851c3328581ea"
Unverified Commit 25bb9e8c authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[CI Failure] fix models/language/pooling/test_auto_prefix_cache_support.py (#24636)


Signed-off-by: default avatarwang.yuqi <noooop@126.com>
parent a1213fae
...@@ -3558,6 +3558,10 @@ class VllmConfig: ...@@ -3558,6 +3558,10 @@ class VllmConfig:
disable_chunked_prefill_reasons.append( disable_chunked_prefill_reasons.append(
"Only \"last\" pooling supports chunked " "Only \"last\" pooling supports chunked "
"prefill and prefix caching; disabling both.") "prefill and prefix caching; disabling both.")
if not getattr(self.model_config.hf_config, "is_causal", True):
disable_chunked_prefill_reasons.append(
"Only models using causal attention supports chunked "
"prefill and prefix caching; disabling both.")
elif self.model_config.is_encoder_decoder: elif self.model_config.is_encoder_decoder:
self.scheduler_config.max_num_encoder_input_tokens = \ self.scheduler_config.max_num_encoder_input_tokens = \
MULTIMODAL_REGISTRY.get_encdec_max_encoder_len(self.model_config) MULTIMODAL_REGISTRY.get_encdec_max_encoder_len(self.model_config)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment