Unverified Commit 506ad7d7 authored by J Seppänen's avatar J Seppänen Committed by GitHub
Browse files

[Bugfix] Fix weights offloading for sleep mode (#32947)


Signed-off-by: default avatarJarno Seppänen <jseppanen@nvidia.com>
Co-authored-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
parent fdd6f2ad
...@@ -286,9 +286,10 @@ class Worker(WorkerBase): ...@@ -286,9 +286,10 @@ class Worker(WorkerBase):
# to hijack tensor allocation. # to hijack tensor allocation.
def load_model(self) -> None: def load_model(self) -> None:
eep_scale_up = os.environ.get("VLLM_ELASTIC_EP_SCALE_UP_LAUNCH") == "1" eep_scale_up = os.environ.get("VLLM_ELASTIC_EP_SCALE_UP_LAUNCH") == "1"
with self._maybe_get_memory_pool_context( with (
tag="weights" self._maybe_get_memory_pool_context(tag="weights"),
) and set_current_vllm_config(self.vllm_config): set_current_vllm_config(self.vllm_config),
):
self.model_runner.load_model(eep_scale_up=eep_scale_up) self.model_runner.load_model(eep_scale_up=eep_scale_up)
def update_config(self, overrides: dict[str, Any]) -> None: def update_config(self, overrides: dict[str, Any]) -> None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment