Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
2d2ed2cc
Unverified
Commit
2d2ed2cc
authored
Mar 03, 2021
by
Patrick von Platen
Committed by
GitHub
Mar 03, 2021
Browse files
[T5] Fix speed degradation bug t5 (#10496)
* fix speed degradation bug t5 * fix for all models * fix code quality
parent
5dc303e2
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
30 additions
and
11 deletions
+30
-11
src/transformers/models/bart/modeling_bart.py
src/transformers/models/bart/modeling_bart.py
+3
-1
src/transformers/models/blenderbot/modeling_blenderbot.py
src/transformers/models/blenderbot/modeling_blenderbot.py
+3
-1
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
...mers/models/blenderbot_small/modeling_blenderbot_small.py
+3
-1
src/transformers/models/led/modeling_led.py
src/transformers/models/led/modeling_led.py
+3
-1
src/transformers/models/marian/modeling_marian.py
src/transformers/models/marian/modeling_marian.py
+3
-1
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mbart/modeling_mbart.py
+3
-1
src/transformers/models/pegasus/modeling_pegasus.py
src/transformers/models/pegasus/modeling_pegasus.py
+3
-1
src/transformers/models/t5/modeling_t5.py
src/transformers/models/t5/modeling_t5.py
+8
-3
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
...elname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
+1
-1
No files found.
src/transformers/models/bart/modeling_bart.py
View file @
2d2ed2cc
...
@@ -319,7 +319,9 @@ class BartEncoderLayer(nn.Module):
...
@@ -319,7 +319,9 @@ class BartEncoderLayer(nn.Module):
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/blenderbot/modeling_blenderbot.py
View file @
2d2ed2cc
...
@@ -322,7 +322,9 @@ class BlenderbotEncoderLayer(nn.Module):
...
@@ -322,7 +322,9 @@ class BlenderbotEncoderLayer(nn.Module):
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
View file @
2d2ed2cc
...
@@ -320,7 +320,9 @@ class BlenderbotSmallEncoderLayer(nn.Module):
...
@@ -320,7 +320,9 @@ class BlenderbotSmallEncoderLayer(nn.Module):
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/led/modeling_led.py
View file @
2d2ed2cc
...
@@ -925,7 +925,9 @@ class LEDEncoderLayer(nn.Module):
...
@@ -925,7 +925,9 @@ class LEDEncoderLayer(nn.Module):
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
return
(
hidden_states
,)
+
attn_outputs
[
1
:]
return
(
hidden_states
,)
+
attn_outputs
[
1
:]
...
...
src/transformers/models/marian/modeling_marian.py
View file @
2d2ed2cc
...
@@ -337,7 +337,9 @@ class MarianEncoderLayer(nn.Module):
...
@@ -337,7 +337,9 @@ class MarianEncoderLayer(nn.Module):
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/mbart/modeling_mbart.py
View file @
2d2ed2cc
...
@@ -326,7 +326,9 @@ class MBartEncoderLayer(nn.Module):
...
@@ -326,7 +326,9 @@ class MBartEncoderLayer(nn.Module):
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/pegasus/modeling_pegasus.py
View file @
2d2ed2cc
...
@@ -337,7 +337,9 @@ class PegasusEncoderLayer(nn.Module):
...
@@ -337,7 +337,9 @@ class PegasusEncoderLayer(nn.Module):
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/t5/modeling_t5.py
View file @
2d2ed2cc
...
@@ -643,7 +643,7 @@ class T5Block(nn.Module):
...
@@ -643,7 +643,7 @@ class T5Block(nn.Module):
attention_outputs
=
self_attention_outputs
[
2
:]
# Keep self-attention outputs and relative position weights
attention_outputs
=
self_attention_outputs
[
2
:]
# Keep self-attention outputs and relative position weights
# clamp inf values to enable fp16 training
# clamp inf values to enable fp16 training
if
torch
.
isinf
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
torch
.
isinf
(
hidden_states
).
any
():
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
@@ -668,7 +668,9 @@ class T5Block(nn.Module):
...
@@ -668,7 +668,9 @@ class T5Block(nn.Module):
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
)
)
hidden_states
=
cross_attention_outputs
[
0
]
hidden_states
=
cross_attention_outputs
[
0
]
if
torch
.
isinf
(
hidden_states
).
any
():
# clamp inf values to enable fp16 training
if
hidden_states
.
dtype
==
torch
.
float16
and
torch
.
isinf
(
hidden_states
).
any
():
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
@@ -681,9 +683,12 @@ class T5Block(nn.Module):
...
@@ -681,9 +683,12 @@ class T5Block(nn.Module):
# Apply Feed Forward layer
# Apply Feed Forward layer
hidden_states
=
self
.
layer
[
-
1
](
hidden_states
)
hidden_states
=
self
.
layer
[
-
1
](
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
():
# clamp inf values to enable fp16 training
if
hidden_states
.
dtype
==
torch
.
float16
and
torch
.
isinf
(
hidden_states
).
any
():
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
outputs
=
(
hidden_states
,)
outputs
=
(
hidden_states
,)
outputs
=
outputs
+
(
present_key_value_state
,)
+
attention_outputs
outputs
=
outputs
+
(
present_key_value_state
,)
+
attention_outputs
...
...
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
View file @
2d2ed2cc
...
@@ -1824,7 +1824,7 @@ class {{cookiecutter.camelcase_modelname}}EncoderLayer(nn.Module):
...
@@ -1824,7 +1824,7 @@ class {{cookiecutter.camelcase_modelname}}EncoderLayer(nn.Module):
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
)
:
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment