Unverified Commit 05dc4bfa authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Model Runner V2] Initialized communication buffer for DP (#32624)


Signed-off-by: default avatarWoosuk Kwon <woosuk.kwon@berkeley.edu>
parent 1a1fc3bb
...@@ -12,6 +12,7 @@ import torch.nn as nn ...@@ -12,6 +12,7 @@ import torch.nn as nn
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.config.compilation import CUDAGraphMode from vllm.config.compilation import CUDAGraphMode
from vllm.distributed.parallel_state import prepare_communication_buffer_for_model
from vllm.forward_context import set_forward_context from vllm.forward_context import set_forward_context
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.model_loader import get_model_loader from vllm.model_executor.model_loader import get_model_loader
...@@ -206,6 +207,12 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -206,6 +207,12 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
time_after_load - time_before_load, time_after_load - time_before_load,
) )
prepare_communication_buffer_for_model(self.model)
if self.do_spec_decode:
speculator_model = getattr(self.speculator, "model", None)
if speculator_model is not None:
prepare_communication_buffer_for_model(speculator_model)
def get_model(self) -> nn.Module: def get_model(self) -> nn.Module:
return self.model return self.model
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment