Unverified Commit d1336018 authored by Chenxi Yang's avatar Chenxi Yang Committed by GitHub
Browse files

Remove redundant all gather + split (#23441)


Co-authored-by: default avatarChenxi Yang <cxyang@meta.com>
Co-authored-by: default avatarLu Fang <30275821+houseroad@users.noreply.github.com>
parent 9bd831f5
......@@ -272,23 +272,10 @@ class Glm4vVisionAttention(nn.Module):
def split_qkv(self, qkv: torch.Tensor) -> tuple[torch.Tensor, ...]:
# [s, b, 3 * head * head_dim]
seq_len, bs, _ = qkv.shape
if self.tp_size > 1:
qkv = all_gather_interleave(qkv, self.qkv.hidden_size,
self.tp_size)
# [s, b, 3 * head * head_dim] -> 3 * [s, b, head * head_dim]
q, k, v = qkv.chunk(3, dim=2)
# 3 * [s, b, head * head_dim]
if self.tp_size > 1:
splitter = partial(
dist_utils.split_tensor_along_last_dim,
num_partitions=self.tp_size,
)
q = splitter(q)[self.tp_rank]
k = splitter(k)[self.tp_rank]
v = splitter(v)[self.tp_rank]
# 3 * [s, b, head * head_dim] -> 3 * [s, b, head, head_dim]
new_shape = (
seq_len,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment