"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "0731fa158739fceebe24aec007d71bcd0c420cc9"
Unverified Commit 675d2a5a authored by Wang, Yi's avatar Wang, Yi Committed by GitHub
Browse files

fix AutoTP in deepspeed could not work for bloom (#22196)



* fix AutoTP in deepspeed could not work for bloom
Signed-off-by: default avatarWang, Yi A <yi.a.wang@intel.com>

* add a method in BloomModel to build ailib
Signed-off-by: default avatarWang, Yi A <yi.a.wang@intel.com>

---------
Signed-off-by: default avatarWang, Yi A <yi.a.wang@intel.com>
parent 00934026
......@@ -641,6 +641,9 @@ class BloomModel(BloomPreTrainedModel):
# Initialize weights and apply final processing
self.post_init()
def build_alibi_tensor(self, attention_mask: torch.Tensor, num_heads: int, dtype: torch.dtype) -> torch.Tensor:
return build_alibi_tensor(attention_mask, num_heads, dtype)
def get_input_embeddings(self):
return self.word_embeddings
......@@ -750,7 +753,7 @@ class BloomModel(BloomPreTrainedModel):
else:
attention_mask = attention_mask.to(hidden_states.device)
alibi = build_alibi_tensor(attention_mask, self.num_heads, dtype=hidden_states.dtype)
alibi = self.build_alibi_tensor(attention_mask, self.num_heads, dtype=hidden_states.dtype)
causal_mask = self._prepare_attn_mask(
attention_mask,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment