[misc] add torch.compile compatibility check (#10618)

Signed-off-by: youkaichao <youkaichao@gmail.com>

[misc] add torch.compile compatibility check (#10618)
Signed-off-by: youkaichao <youkaichao@gmail.com>
25d806e9 · youkaichao · GitHub · 65813781 · 25d806e9 · 25d806e9
Unverified Commit 25d806e9 authored Nov 24, 2024 by youkaichao Committed by GitHub Nov 24, 2024
Showing with 22 additions and 1 deletion

tests/v1/engine/test_engine_core_client.py tests/v1/engine/test_engine_core_client.py +1 -1

vllm/config.py vllm/config.py +14 -0

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +7 -0

No files found.
--- a/tests/v1/engine/test_engine_core_client.py
+++ b/tests/v1/engine/test_engine_core_client.py
@@ -81,7 +81,7 @@ def test_engine_core_client(monkeypatch, multiprocessing_mode: bool):
    with monkeypatch.context() as m:
        m.setenv("VLLM_USE_V1", "1")

-        engine_args = EngineArgs(model=MODEL_NAME)
+        engine_args = EngineArgs(model=MODEL_NAME, compilation_config=3)
        vllm_config = engine_args.create_engine_config()
        executor_class = AsyncLLM._get_executor_cls(vllm_config)
        client = EngineCoreClient.make_client(

--- a/vllm/config.py
+++ b/vllm/config.py
@@ -2394,6 +2394,20 @@ class VllmConfig:
            self.compilation_config.pass_config.enable_reshape = False
            self.compilation_config.level = CompilationLevel.PIECEWISE

+        if self.cache_config is not None and \
+            self.cache_config.cpu_offload_gb > 0 and \
+            self.compilation_config.level != CompilationLevel.NO_COMPILATION:
+            logger.warning(
+                "CPU offload is not supported with `torch.compile` yet."
+                " Disabling `torch.compile`.")
+            self.compilation_config.level = CompilationLevel.NO_COMPILATION
+
+        if self.lora_config is not None and self.compilation_config.level !=\
+             CompilationLevel.NO_COMPILATION:
+            logger.warning("LoRA is not supported with `torch.compile` yet. "
+                           "Disabling `torch.compile`.")
+            self.compilation_config.level = CompilationLevel.NO_COMPILATION
+
        current_platform.check_and_update_config(self)

    def __str__(self):

--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -197,6 +197,13 @@ class EngineArgs:
        if not self.tokenizer:
            self.tokenizer = self.model

+        # support `EngineArgs(compilation_config={...})`
+        # without having to manually construct a
+        # CompilationConfig object
+        if isinstance(self.compilation_config, (int, dict)):
+            self.compilation_config = CompilationConfig.from_cli(
+                json.dumps(self.compilation_config))
+
        # Setup plugins
        from vllm.plugins import load_general_plugins
        load_general_plugins()