Unverified Commit b91d8db8 authored by Jaya Yuan's avatar Jaya Yuan Committed by GitHub
Browse files

[Bugfix][DCP] Set default CUDAGraphMode to PIECEWISE for DCP (#26574)


Signed-off-by: default avatarFENP <32334296+FENP@users.noreply.github.com>
parent 045b396d
...@@ -350,6 +350,15 @@ class VllmConfig: ...@@ -350,6 +350,15 @@ class VllmConfig:
or self.model_config.is_encoder_decoder or self.model_config.is_encoder_decoder
): ):
self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
# decode context parallel do not support full cudagraphs now.
if self.parallel_config.decode_context_parallel_size > 1:
logger.warning(
"Decode context parallel (DCP) is enabled, which is "
"incompatible with full CUDA graphs. Set "
"cudagraph_mode to PIECEWISE."
)
self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
else: else:
self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment