Unverified Commit 87789c83 authored by Richard Zou's avatar Richard Zou Committed by GitHub
Browse files

[Misc] vLLM's --enforce-eager should turn off compile and cudagraphs only (#34523)


Signed-off-by: default avatarRichard Zou <zou3519@gmail.com>
parent bcd65c1f
......@@ -728,13 +728,13 @@ class VllmConfig:
"precision for chunked prefill triton kernels."
)
if (
self.optimization_level > OptimizationLevel.O0
and self.model_config is not None
and self.model_config.enforce_eager
):
logger.warning("Enforce eager set, overriding optimization level to -O0")
self.optimization_level = OptimizationLevel.O0
if self.model_config is not None and self.model_config.enforce_eager:
logger.warning(
"Enforce eager set, disabling torch.compile and CUDAGraphs. "
"This is equivalent to setting -cc.mode=none -cc.cudagraph_mode=none"
)
self.compilation_config.mode = CompilationMode.NONE
self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE
if self.compilation_config.backend == "eager" or (
self.compilation_config.mode is not None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment