Commit d8b9028d authored by zhuwenwen's avatar zhuwenwen
Browse files

set self.max_seq_len_to_capture = self.max_model_len

parent 4fd5389b
...@@ -15,7 +15,7 @@ vLLM是一个快速且易于使用的LLM推理和服务库,使用PageAttention ...@@ -15,7 +15,7 @@ vLLM是一个快速且易于使用的LLM推理和服务库,使用PageAttention
| Llama4ForConditionalGeneration | Llama 4 | No/Yes | - | - | v0.8.5.post1 | No | | Llama4ForConditionalGeneration | Llama 4 | No/Yes | - | - | v0.8.5.post1 | No |
| QWenLMHeadModel | QWen,Qwen-VL | Yes | Yes | Yes | v0.5.0,Qwen-VL>=v0.6.2 | Yes | | QWenLMHeadModel | QWen,Qwen-VL | Yes | Yes | Yes | v0.5.0,Qwen-VL>=v0.6.2 | Yes |
| Qwen2ForCausalLM | QWen2,QWen1.5,CodeQwen1.5,DeepSeek-R1-Distill-Qwen,gte_Qwen2-1.5B-instruct | Yes | Yes | Yes | v0.5.0,gte>=v0.7.2 | Yes | | Qwen2ForCausalLM | QWen2,QWen1.5,CodeQwen1.5,DeepSeek-R1-Distill-Qwen,gte_Qwen2-1.5B-instruct | Yes | Yes | Yes | v0.5.0,gte>=v0.7.2 | Yes |
| Qwen3ForCausalLM | QWen3,Qwen3-Embedding,Qwen3-Reranker | Yes | - | - | v0.8.4 | Yes | | Qwen3ForCausalLM | QWen3,Qwen3-Embedding | Yes | - | - | v0.8.4 | Yes |
| Qwen3MoeForCausalLM | QWen3MoE | Yes | - | - | v0.8.4 | Yes | | Qwen3MoeForCausalLM | QWen3MoE | Yes | - | - | v0.8.4 | Yes |
| ChatGLMModel | glm-4v-9b,chatglm3,chatglm2 | Yes | No | Yes | v0.5.0 | Yes | | ChatGLMModel | glm-4v-9b,chatglm3,chatglm2 | Yes | No | Yes | v0.5.0 | Yes |
| Glm4ForCausalLM | GLM-4-0414 | No/Yes | - | - | v0.8.5.post1 | Yes | | Glm4ForCausalLM | GLM-4-0414 | No/Yes | - | - | v0.8.5.post1 | Yes |
......
...@@ -836,8 +836,9 @@ class ModelConfig: ...@@ -836,8 +836,9 @@ class ModelConfig:
def _verify_cuda_graph(self) -> None: def _verify_cuda_graph(self) -> None:
if self.max_seq_len_to_capture is None: if self.max_seq_len_to_capture is None:
self.max_seq_len_to_capture = self.max_model_len self.max_seq_len_to_capture = self.max_model_len
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture, # self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
self.max_model_len) # self.max_model_len)
self.max_seq_len_to_capture = self.max_model_len
ROCM_UNSUPPORTED_MODELS = ['mllama'] ROCM_UNSUPPORTED_MODELS = ['mllama']
if (self.hf_config.model_type in ROCM_UNSUPPORTED_MODELS if (self.hf_config.model_type in ROCM_UNSUPPORTED_MODELS
and not self.enforce_eager and current_platform.is_rocm()): and not self.enforce_eager and current_platform.is_rocm()):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment