Unverified Commit 7655dc3e authored by iAmir97's avatar iAmir97 Committed by GitHub
Browse files

[Bugfix] Add reset prefix cache for online serving (#22726)


Signed-off-by: default avatariAmir97 <Amir.balwel@embeddedllm.com>
Signed-off-by: default avatariAmir97 <71513472+iAmir97@users.noreply.github.com>
Co-authored-by: default avatariAmir97 <Amir.balwel@embeddedllm.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent f4efda82
......@@ -1092,6 +1092,7 @@ class AsyncLLMEngine(EngineClient):
self.engine.reset_prefix_cache(device)
async def sleep(self, level: int = 1) -> None:
await self.reset_prefix_cache()
self.engine.sleep(level)
async def wake_up(self, tags: Optional[list[str]] = None) -> None:
......
......@@ -576,6 +576,7 @@ class AsyncLLM(EngineClient):
await self.engine_core.reset_prefix_cache_async()
async def sleep(self, level: int = 1) -> None:
await self.reset_prefix_cache()
await self.engine_core.sleep_async(level)
async def wake_up(self, tags: Optional[list[str]] = None) -> None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment