Unverified Commit f9e6db30 authored by Lukas Geiger's avatar Lukas Geiger Committed by GitHub
Browse files

[Models][Qwen3 ViT] Keep `max_seqlen` on CPU to prevent D2H sync (#37139)


Signed-off-by: default avatarLukas Geiger <lukas.geiger94@gmail.com>
Co-authored-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent d61d2b08
......@@ -557,7 +557,6 @@ class Qwen3_VisionTransformer(nn.Module):
max_seqlen = torch.tensor(
MMEncoderAttention.compute_max_seqlen(self.attn_backend, cu_seqlens),
dtype=torch.int32,
device=self.device,
)
cu_seqlens = MMEncoderAttention.maybe_recompute_cu_seqlens(
self.attn_backend,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment