Commit 35d32308 authored by thomwolf's avatar thomwolf
Browse files

adding back final dropout in T5

parent e74c73a8
...@@ -629,6 +629,7 @@ class T5Stack(T5PreTrainedModel): ...@@ -629,6 +629,7 @@ class T5Stack(T5PreTrainedModel):
all_attentions = all_attentions + (layer_outputs[1],) # We keep only self-attention weights for now all_attentions = all_attentions + (layer_outputs[1],) # We keep only self-attention weights for now
hidden_states = self.final_layer_norm(hidden_states) hidden_states = self.final_layer_norm(hidden_states)
hidden_states = self.dropout(hidden_states)
# Add last layer # Add last layer
if self.output_hidden_states: if self.output_hidden_states:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment