Unverified Commit 0a4806f0 authored by youkaichao's avatar youkaichao Committed by GitHub
Browse files

[plugin][torch.compile] allow to add custom compile backend (#8445)

parent ecd7a1d5
import logging import logging
from typing import Callable, Optional, Union
import vllm.envs as envs import vllm.envs as envs
...@@ -29,3 +30,15 @@ def load_general_plugins(): ...@@ -29,3 +30,15 @@ def load_general_plugins():
except Exception: except Exception:
logger.exception("Failed to load general plugin: %s", logger.exception("Failed to load general plugin: %s",
plugin.name) plugin.name)
_torch_compile_backend: Optional[Union[Callable, str]] = None
def set_torch_compile_backend(backend: Union[Callable, str]):
global _torch_compile_backend
_torch_compile_backend = backend
def get_torch_compile_backend() -> Optional[Union[Callable, str]]:
return _torch_compile_backend
...@@ -1064,10 +1064,12 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]): ...@@ -1064,10 +1064,12 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
"This may lead to less accurate results!") "This may lead to less accurate results!")
if envs.VLLM_TEST_DYNAMO_GRAPH_CAPTURE and supports_dynamo(): if envs.VLLM_TEST_DYNAMO_GRAPH_CAPTURE and supports_dynamo():
from vllm.plugins import get_torch_compile_backend
backend = get_torch_compile_backend() or "eager"
self.model = torch.compile( self.model = torch.compile(
self.model, self.model,
fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE, fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
backend="eager") backend=backend)
def save_sharded_state( def save_sharded_state(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment