"vllm/vscode:/vscode.git/clone" did not exist on "de7eb10ce4fba23d204e06ecec4261669fab884a"
Unverified Commit cb3e73e4 authored by fade_away's avatar fade_away Committed by GitHub
Browse files

[BugFix] fix wrong output when using lora and num_scheduler_steps=8 (#11161)

FIX issue https://github.com/vllm-project/vllm/issues/9688
https://github.com/vllm-project/vllm/issues/11086

 #12487

---------
Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
Co-authored-by: default avatarweilong.yu <weilong.yu@shopee.com>
Co-authored-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent b1340f9d
......@@ -1346,6 +1346,10 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
self.execute_model(model_input, kv_caches, intermediate_tensors)
torch.cuda.synchronize()
if self.lora_config:
# Remove dummy loras.
assert self.lora_manager is not None
self.remove_all_loras()
return
def remove_all_loras(self):
......
......@@ -264,10 +264,7 @@ class Worker(LocalOrDistributedWorkerBase):
f"{(available_kv_cache_memory / GiB_bytes):.2f}GiB.")
logger.info(msg)
# Final cleanup
if self.model_runner.lora_manager:
self.model_runner.remove_all_loras()
gc.collect()
return num_gpu_blocks, num_cpu_blocks
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment