"tests/vscode:/vscode.git/clone" did not exist on "eec906d8114cd786315e49ab7f5a3093d1896880"
Commit 72273242 authored by zhuwenwen's avatar zhuwenwen
Browse files

update max_seq_len_to_capture

parent 267cc5ff
...@@ -313,7 +313,8 @@ class ModelConfig: ...@@ -313,7 +313,8 @@ class ModelConfig:
graph and always execute the model in eager mode. If False, we will use graph and always execute the model in eager mode. If False, we will use
CUDA graph and eager execution in hybrid for maximal performance and CUDA graph and eager execution in hybrid for maximal performance and
flexibility.""" flexibility."""
max_seq_len_to_capture: int = 8192 # max_seq_len_to_capture: int = 8192
max_seq_len_to_capture: bool = None
"""Maximum sequence len covered by CUDA graphs. When a sequence has context """Maximum sequence len covered by CUDA graphs. When a sequence has context
length larger than this, we fall back to eager mode. Additionally for length larger than this, we fall back to eager mode. Additionally for
encoder-decoder models, if the sequence length of the encoder input is encoder-decoder models, if the sequence length of the encoder input is
...@@ -973,9 +974,11 @@ class ModelConfig: ...@@ -973,9 +974,11 @@ class ModelConfig:
"non-quantized models.", self.quantization) "non-quantized models.", self.quantization)
def _verify_cuda_graph(self) -> None: def _verify_cuda_graph(self) -> None:
# self.max_seq_len_to_capture = min(self.max_seq_len_to_capture, if self.max_seq_len_to_capture is None:
# self.max_model_len)
self.max_seq_len_to_capture = self.max_model_len self.max_seq_len_to_capture = self.max_model_len
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
self.max_model_len)
# self.max_seq_len_to_capture = self.max_model_len
# CUDAGraph capture not supported for enc-dec models and mllama on ROCm # CUDAGraph capture not supported for enc-dec models and mllama on ROCm
ROCM_UNSUPPORTED_MODELS = ['mllama'] ROCM_UNSUPPORTED_MODELS = ['mllama']
unsupported_rocm = (self.hf_config.model_type unsupported_rocm = (self.hf_config.model_type
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment