[Bugfix] Fix embedding to support 2D inputs (#5829)

6806998b · Woosuk Kwon · GitHub · 515080ad · 6806998b
Unverified Commit 6806998b authored Jun 26, 2024 by Woosuk Kwon Committed by GitHub Jun 26, 2024
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/model_executor/layers/vocab_parallel_embedding.py vllm/model_executor/layers/vocab_parallel_embedding.py +2 -2

No files found.
--- a/vllm/model_executor/layers/vocab_parallel_embedding.py
+++ b/vllm/model_executor/layers/vocab_parallel_embedding.py
@@ -310,7 +310,7 @@ class VocabParallelEmbedding(torch.nn.Module):
        output_parallel = F.embedding(masked_input.long(), self.weight)
        # Mask the output embedding.
        if self.tp_size > 1:
-            output_parallel.masked_fill_(input_mask.unsqueeze(1), 0)
+            output_parallel.masked_fill_(input_mask.unsqueeze(-1), 0)
        # Reduce across all the model parallel GPUs.
        output = tensor_model_parallel_all_reduce(output_parallel)
        return output