Unverified Commit 2e7cb46f authored by Arthur's avatar Arthur Committed by GitHub
Browse files

[`cleanup`] vestiges of causal mask (#29806)

nit
parent 884b2215
...@@ -825,12 +825,6 @@ class CohereModel(CoherePreTrainedModel): ...@@ -825,12 +825,6 @@ class CohereModel(CoherePreTrainedModel):
self.norm = CohereLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.norm = CohereLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
self.gradient_checkpointing = False self.gradient_checkpointing = False
# Register a causal mask to separate causal and padding mask creation. Merging happens in the attention class.
# NOTE: This is not friendly with TorchScript, ONNX, ExportedProgram serialization for very large `max_position_embeddings`.
causal_mask = torch.full(
(config.max_position_embeddings, config.max_position_embeddings), fill_value=True, dtype=torch.bool
)
self.register_buffer("causal_mask", torch.triu(causal_mask, diagonal=1), persistent=False)
# Initialize weights and apply final processing # Initialize weights and apply final processing
self.post_init() self.post_init()
......
...@@ -719,10 +719,6 @@ class GemmaPreTrainedModel(PreTrainedModel): ...@@ -719,10 +719,6 @@ class GemmaPreTrainedModel(PreTrainedModel):
"make sure to use `sdpa` in the mean time, and open an issue at https://github.com/huggingface/transformers" "make sure to use `sdpa` in the mean time, and open an issue at https://github.com/huggingface/transformers"
) )
if max_cache_len > self.model.causal_mask.shape[-1] or self.device != self.model.causal_mask.device:
causal_mask = torch.full((max_cache_len, max_cache_len), fill_value=1, device=self.device)
self.register_buffer("causal_mask", torch.triu(causal_mask, diagonal=1), persistent=False)
for layer in self.model.layers: for layer in self.model.layers:
weights = layer.self_attn.o_proj.weight weights = layer.self_attn.o_proj.weight
layer.self_attn.past_key_value = cache_cls( layer.self_attn.past_key_value = cache_cls(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment