Unverified Commit 96b9aa5a authored by Morrison Turnansky's avatar Morrison Turnansky Committed by GitHub
Browse files

[Frontend][torch.compile] CompilationConfig Overhaul (#20283): name change ...


[Frontend][torch.compile] CompilationConfig Overhaul (#20283): name change  compilation level to compilation mode, deprecation compilation level (#26355)
Signed-off-by: default avatarmorrison-turnansky <mturnans@redhat.com>
Signed-off-by: default avatarMorrison Turnansky <mturnans@redhat.com>
Co-authored-by: default avatarLuka Govedič <ProExpertProg@users.noreply.github.com>
parent e66d787b
...@@ -9,7 +9,7 @@ import torch ...@@ -9,7 +9,7 @@ import torch
import torch.nn as nn import torch.nn as nn
from vllm.config import ( from vllm.config import (
CompilationLevel, CompilationMode,
CUDAGraphMode, CUDAGraphMode,
VllmConfig, VllmConfig,
get_layers_from_vllm_config, get_layers_from_vllm_config,
...@@ -86,7 +86,7 @@ class EagleProposer: ...@@ -86,7 +86,7 @@ class EagleProposer:
self.use_cuda_graph = False self.use_cuda_graph = False
compilation_config = self.vllm_config.compilation_config compilation_config = self.vllm_config.compilation_config
if compilation_config.level == CompilationLevel.PIECEWISE: if compilation_config.mode == CompilationMode.VLLM_COMPILE:
cudagraph_mode = compilation_config.cudagraph_mode cudagraph_mode = compilation_config.cudagraph_mode
if cudagraph_mode != CUDAGraphMode.NONE and not cudagraph_mode.has_mode( if cudagraph_mode != CUDAGraphMode.NONE and not cudagraph_mode.has_mode(
CUDAGraphMode.PIECEWISE CUDAGraphMode.PIECEWISE
......
...@@ -25,7 +25,7 @@ from vllm.compilation.counter import compilation_counter ...@@ -25,7 +25,7 @@ from vllm.compilation.counter import compilation_counter
from vllm.compilation.cuda_graph import CUDAGraphWrapper from vllm.compilation.cuda_graph import CUDAGraphWrapper
from vllm.compilation.monitor import set_cudagraph_capturing_enabled from vllm.compilation.monitor import set_cudagraph_capturing_enabled
from vllm.config import ( from vllm.config import (
CompilationLevel, CompilationMode,
CUDAGraphMode, CUDAGraphMode,
VllmConfig, VllmConfig,
get_layers_from_vllm_config, get_layers_from_vllm_config,
...@@ -2927,14 +2927,15 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -2927,14 +2927,15 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
) )
if ( if (
self.vllm_config.compilation_config.level == CompilationLevel.DYNAMO_AS_IS self.vllm_config.compilation_config.mode
== CompilationMode.STOCK_TORCH_COMPILE
and supports_dynamo() and supports_dynamo()
): ):
backend = self.vllm_config.compilation_config.init_backend(self.vllm_config) backend = self.vllm_config.compilation_config.init_backend(self.vllm_config)
compilation_counter.dynamo_as_is_count += 1 compilation_counter.stock_torch_compile_count += 1
self.model.compile(fullgraph=True, backend=backend) self.model.compile(fullgraph=True, backend=backend)
return return
# for other compilation levels, cudagraph behavior is controlled by # for other compilation modes, cudagraph behavior is controlled by
# CudagraphWraper and CudagraphDispatcher of vllm. # CudagraphWraper and CudagraphDispatcher of vllm.
# wrap the model with full cudagraph wrapper if needed. # wrap the model with full cudagraph wrapper if needed.
...@@ -3985,7 +3986,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -3985,7 +3986,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
# if not supported any full cudagraphs, just raise it. # if not supported any full cudagraphs, just raise it.
msg += ( msg += (
"; please try cudagraph_mode=PIECEWISE, and " "; please try cudagraph_mode=PIECEWISE, and "
"make sure compilation level is piecewise" "make sure compilation mode is VLLM_COMPILE"
) )
raise ValueError(msg) raise ValueError(msg)
...@@ -4012,7 +4013,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -4012,7 +4013,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
f"with {min_cg_builder_name} backend (support: " f"with {min_cg_builder_name} backend (support: "
f"{min_cg_support})" f"{min_cg_support})"
) )
if self.compilation_config.level == CompilationLevel.PIECEWISE and ( if self.compilation_config.mode == CompilationMode.VLLM_COMPILE and (
self.compilation_config.splitting_ops_contain_attention() self.compilation_config.splitting_ops_contain_attention()
or self.compilation_config.use_inductor_graph_partition or self.compilation_config.use_inductor_graph_partition
): ):
...@@ -4068,7 +4069,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -4068,7 +4069,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
f"supported with {min_cg_builder_name} backend (" f"supported with {min_cg_builder_name} backend ("
f"support:{min_cg_support}) " f"support:{min_cg_support}) "
"; please try cudagraph_mode=PIECEWISE, " "; please try cudagraph_mode=PIECEWISE, "
"and make sure compilation level is piecewise" "and make sure compilation mode is VLLM_COMPILE"
) )
# Trigger cudagraph dispatching keys initialization here (after # Trigger cudagraph dispatching keys initialization here (after
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment