Unverified Commit 04024282 authored by Lehua Ding's avatar Lehua Ding Committed by GitHub
Browse files

[Perf][Async Scheduling] Remove CPU->GPU sync in dummy_run (#27455)


Signed-off-by: default avatarLehua Ding <lehuading@tencent.com>
parent 17af6aa0
......@@ -3492,7 +3492,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
self.eplb_step(is_dummy=True, is_profile=is_profile)
logit_indices = np.cumsum(num_scheduled_tokens) - 1
return hidden_states, hidden_states[logit_indices]
logit_indices_device = torch.from_numpy(logit_indices).to(
self.device, non_blocking=True
)
return hidden_states, hidden_states[logit_indices_device]
@torch.inference_mode()
def _dummy_sampler_run(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment