Unverified Commit be740acd authored by Vincent Zhong's avatar Vincent Zhong Committed by GitHub
Browse files

[smol] [perf] Qwen3-VL in place op. (#11481)


Signed-off-by: default avatarvincentzed <207368749+vincentzed@users.noreply.github.com>
parent 2db2cddd
...@@ -189,10 +189,10 @@ class Qwen3_VisionBlock(nn.Module): ...@@ -189,10 +189,10 @@ class Qwen3_VisionBlock(nn.Module):
position_embeddings=position_embeddings, position_embeddings=position_embeddings,
) )
attn = rearrange(attn, "b s ... -> s b ...") attn = rearrange(attn, "b s ... -> s b ...")
x = x + attn x += attn
norm2 = self.norm2(x) norm2 = self.norm2(x)
mlp = self.mlp(norm2) mlp = self.mlp(norm2)
x = x + mlp x += mlp
return x return x
...@@ -441,7 +441,7 @@ class Qwen3_VisionTransformer(nn.Module): ...@@ -441,7 +441,7 @@ class Qwen3_VisionTransformer(nn.Module):
x = self.patch_embed(x) x = self.patch_embed(x)
pos_embeds = self.fast_pos_embed_interpolate(grid_thw) pos_embeds = self.fast_pos_embed_interpolate(grid_thw)
x = x + pos_embeds x += pos_embeds
rotary_pos_emb = self.rot_pos_emb(grid_thw) rotary_pos_emb = self.rot_pos_emb(grid_thw)
seq_len, _ = x.size() seq_len, _ = x.size()
...@@ -574,10 +574,7 @@ class Qwen3LLMModel(Qwen3Model): ...@@ -574,10 +574,7 @@ class Qwen3LLMModel(Qwen3Model):
and layer_idx in self.deepstack_embed_to_decoder_layer and layer_idx in self.deepstack_embed_to_decoder_layer
): ):
sep = self.hidden_size * layer_idx sep = self.hidden_size * layer_idx
hidden_states = ( hidden_states += input_deepstack_embeds[:, sep : sep + self.hidden_size]
hidden_states
+ input_deepstack_embeds[:, sep : sep + self.hidden_size]
)
if not self.pp_group.is_last_rank: if not self.pp_group.is_last_rank:
return PPProxyTensors( return PPProxyTensors(
......
...@@ -114,7 +114,7 @@ class Qwen3MoeLLMModel(Qwen3MoeModel): ...@@ -114,7 +114,7 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
for layer_idx, layer in enumerate( for layer_idx, layer in enumerate(
self.layers[self.start_layer : self.end_layer] self.layers[self.start_layer : self.end_layer]
): ):
layer_idx = layer_idx + self.start_layer layer_idx += self.start_layer
if layer_idx in self.layers_to_capture: if layer_idx in self.layers_to_capture:
aux_hidden_states.append( aux_hidden_states.append(
hidden_states + residual if residual is not None else hidden_states hidden_states + residual if residual is not None else hidden_states
...@@ -130,9 +130,8 @@ class Qwen3MoeLLMModel(Qwen3MoeModel): ...@@ -130,9 +130,8 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
# process deepstack # process deepstack
if input_deepstack_embeds is not None and layer_idx in range(3): if input_deepstack_embeds is not None and layer_idx in range(3):
sep = self.hidden_size * layer_idx sep = self.hidden_size * layer_idx
hidden_states = ( hidden_states.add_(
hidden_states input_deepstack_embeds[:, sep : sep + self.hidden_size]
+ input_deepstack_embeds[:, sep : sep + self.hidden_size]
) )
if not self.pp_group.is_last_rank: if not self.pp_group.is_last_rank:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment