[torch.compile] add dynamo time tracking (#11005)

Signed-off-by: youkaichao <youkaichao@gmail.com>

[torch.compile] add dynamo time tracking (#11005)
Signed-off-by: youkaichao <youkaichao@gmail.com>
d1c2e15e · youkaichao · GitHub · af7c4a92 · d1c2e15e · d1c2e15e
Unverified Commit d1c2e15e authored Dec 08, 2024 by youkaichao Committed by GitHub Dec 08, 2024
Showing with 16 additions and 5 deletions

vllm/compilation/backends.py vllm/compilation/backends.py +6 -0

vllm/compilation/decorators.py vllm/compilation/decorators.py +3 -3

vllm/compilation/monitor.py vllm/compilation/monitor.py +7 -2

No files found.
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -265,7 +265,13 @@ class VllmBackend:
    def __call__(self, graph: fx.GraphModule, example_inputs) -> Callable:
+        # when dynamo calls the backend, it means the bytecode
+        # transform and analysis are done
        compilation_counter.num_graphs_seen += 1
+        from .monitor import torch_compile_start_time
+        dynamo_time = time.time() - torch_compile_start_time
+        logger.info("Dynamo bytecode transform time: %.2f s", dynamo_time)
+        self.compilation_configs.compilation_time += dynamo_time
        # we control the compilation process, each instance can only be
        # called once

--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -145,6 +145,7 @@ def _support_torch_compile(
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = '', **kwargs):
        old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs)
+        self.vllm_config = vllm_config
        # for CompilationLevel.DYNAMO_AS_IS , the upper level model runner
        # will handle the compilation, so we don't need to do anything here.
        self.do_not_compile = \
@@ -157,9 +158,6 @@ def _support_torch_compile(
        TorchCompileWrapperWithCustomDispatcher.__init__(
            self, compilation_level=vllm_config.compilation_config.level)
-        if vllm_config.compilation_config.level == CompilationLevel.PIECEWISE:
-            start_monitoring_torch_compile(vllm_config.compilation_config)
    cls.__init__ = __init__
    def __call__(self, *args, **kwargs):
@@ -186,6 +184,8 @@ def _support_torch_compile(
                        raise ValueError(
                            "Unsupported dynamic dimensions"
                            f" {dims} for argument {k} with type {type(arg)}.")
+            # here, it is the starting point of the `torch.compile` process
+            start_monitoring_torch_compile(self.vllm_config.compilation_config)
        # if we don't use custom dispatcher, we can directly call the
        # compiled function and let torch.compile handle the dispatching,

--- a/vllm/compilation/monitor.py
+++ b/vllm/compilation/monitor.py
+import time
 from vllm.config import CompilationConfig, CompilationLevel
 from vllm.logger import init_logger
 logger = init_logger(__name__)
+torch_compile_start_time: float = 0.0
 def start_monitoring_torch_compile(compilation_config: CompilationConfig):
-    pass
+    global torch_compile_start_time
+    torch_compile_start_time = time.time()
 def end_monitoring_torch_compile(compilation_config: CompilationConfig):
    if compilation_config.level == CompilationLevel.PIECEWISE:
-        logger.info("graph compilation takes %.2f s in total",
+        logger.info("torch.compile takes %.2f s in total",
                    compilation_config.compilation_time)