Commit e606d4b2 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.7.2-fusion-wm' into 'v0.7.2-fusion'

[fix]修复cudagraph和eager分段模式下,开启mla后报错问题

See merge request dcutoolkit/deeplearing/vllm!109
parents f98420b4 e0c80c12
...@@ -892,6 +892,10 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]): ...@@ -892,6 +892,10 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
max_encoder_seq_len=max_encoder_seq_len) max_encoder_seq_len=max_encoder_seq_len)
batch_size = len(input_tokens) batch_size = len(input_tokens)
if batch_size + cuda_graph_pad_size >= self.runner.enforce_eager_bs_threshould:
cuda_graph_pad_size = -1
if cuda_graph_pad_size != -1: if cuda_graph_pad_size != -1:
# If cuda graph can be used, pad tensors accordingly. # If cuda graph can be used, pad tensors accordingly.
# See `capture_model` API for more details. # See `capture_model` API for more details.
...@@ -1709,7 +1713,7 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]): ...@@ -1709,7 +1713,7 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
# virtual engines share the same kv cache. # virtual engines share the same kv cache.
virtual_engine = model_input.virtual_engine virtual_engine = model_input.virtual_engine
if prefill_meta is None and decode_meta.use_cuda_graph and \ if prefill_meta is None and decode_meta.use_cuda_graph and \
model_input.input_tokens.shape[0] <= self.enforce_eager_bs_threshould: model_input.input_tokens.shape[0] < self.enforce_eager_bs_threshould:
assert model_input.input_tokens is not None assert model_input.input_tokens is not None
graph_batch_size = model_input.input_tokens.shape[0] graph_batch_size = model_input.input_tokens.shape[0]
model_executable = self.graph_runners[virtual_engine][ model_executable = self.graph_runners[virtual_engine][
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment