"vllm/vscode:/vscode.git/clone" did not exist on "9a3f49ae07f9627db02b2ee377accb76b65d1d1e"
Unverified Commit 8ccbcda5 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Model Runner V2] Remove unused warmup_for_prefill method (#36762)


Signed-off-by: default avatarWoosuk Kwon <woosuk@inferact.ai>
parent a9e532af
......@@ -532,13 +532,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
)
return cuda_graph_size
def warmup_for_prefill(self) -> None:
# For FlashInfer, we would like to execute a dummy prefill run
# to trigger JIT compilation.
if all("FLASHINFER" in b.get_name() for b in self.attn_backends.values()):
self._dummy_run(self.max_num_tokens, skip_attn=False)
torch.accelerator.synchronize()
def finish_requests(self, scheduler_output: SchedulerOutput) -> None:
finished_req_ids = scheduler_output.finished_req_ids
preempted_req_ids = scheduler_output.preempted_req_ids
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment